{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b7fc99c3",
   "metadata": {},
   "source": [
    "# Use QR-DQN to Play Pong-v4\n",
    "\n",
    "PyTorch version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2b23c9f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import copy\n",
    "import logging\n",
    "import itertools\n",
    "import sys\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "from gym.wrappers.atari_preprocessing import AtariPreprocessing\n",
    "from gym.wrappers.frame_stack import FrameStack\n",
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "torch.manual_seed(0)\n",
    "from torch import nn\n",
    "from torch import optim\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "693bdf1e",
   "metadata": {},
   "source": [
    "Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6ab1cb62",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:11:36 [INFO] env: <AtariPreprocessing<TimeLimit<AtariEnv<PongNoFrameskip-v4>>>>\n",
      "00:11:36 [INFO] action_space: Discrete(6)\n",
      "00:11:36 [INFO] observation_space: Box([[[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]], [[[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]], (4, 84, 84), uint8)\n",
      "00:11:36 [INFO] reward_range: (-inf, inf)\n",
      "00:11:36 [INFO] metadata: {'render.modes': ['human', 'rgb_array']}\n",
      "00:11:36 [INFO] num_stack: 4\n",
      "00:11:36 [INFO] lz4_compress: False\n",
      "00:11:36 [INFO] frames: deque([], maxlen=4)\n",
      "00:11:36 [INFO] id: PongNoFrameskip-v4\n",
      "00:11:36 [INFO] entry_point: gym.envs.atari:AtariEnv\n",
      "00:11:36 [INFO] reward_threshold: None\n",
      "00:11:36 [INFO] nondeterministic: False\n",
      "00:11:36 [INFO] max_episode_steps: 400000\n",
      "00:11:36 [INFO] _kwargs: {'game': 'pong', 'obs_type': 'image', 'frameskip': 1}\n",
      "00:11:36 [INFO] _env_name: PongNoFrameskip\n"
     ]
    }
   ],
   "source": [
    "env = FrameStack(AtariPreprocessing(gym.make('PongNoFrameskip-v4')),\n",
    "        num_stack=4)\n",
    "env.env.env.unwrapped.np_random.seed(0) # set seed for noops\n",
    "env.env.env.unwrapped.unwrapped.seed(0) # set seed for AtariEnv\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3747e919",
   "metadata": {},
   "source": [
    "Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5eb74be3",
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNReplayer:\n",
    "    def __init__(self, capacity):\n",
    "        self.memory = pd.DataFrame(index=range(capacity),\n",
    "                columns=['state', 'action', 'reward', 'next_state', 'done'])\n",
    "        self.i = 0\n",
    "        self.count = 0\n",
    "        self.capacity = capacity\n",
    "\n",
    "    def store(self, *args):\n",
    "        self.memory.loc[self.i] = args\n",
    "        self.i = (self.i + 1) % self.capacity\n",
    "        self.count = min(self.count + 1, self.capacity)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.count, size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.memory.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a2b9a656",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Agent:\n",
    "    def __init__(self, env):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "        self.epsilon = 1.\n",
    "\n",
    "        self.replayer = DQNReplayer(capacity=100000)\n",
    "\n",
    "        self.quantile_count = 64\n",
    "        self.cumprob_tensor = torch.arange(1 / (2 * self.quantile_count),\n",
    "                1, 1 / self.quantile_count).view(1, -1, 1)\n",
    "\n",
    "        self.evaluate_net = nn.Sequential(\n",
    "                nn.Conv2d(4, 32, kernel_size=8, stride=4), nn.ReLU(),\n",
    "                nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(),\n",
    "                nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU(),\n",
    "                nn.Flatten(),\n",
    "                nn.Linear(in_features=3136, out_features=512), nn.ReLU(),\n",
    "                nn.Linear(in_features=512,\n",
    "                out_features=self.action_n * self.quantile_count))\n",
    "        self.target_net = copy.deepcopy(self.evaluate_net)\n",
    "        self.optimizer = optim.Adam(self.evaluate_net.parameters(), lr=0.0001)\n",
    "\n",
    "        self.loss = nn.SmoothL1Loss(reduction=\"none\")\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        state_tensor = torch.as_tensor(observation,\n",
    "                dtype=torch.float).unsqueeze(0)\n",
    "        q_component_tensor = self.evaluate_net(state_tensor).view(-1,\n",
    "                self.action_n, self.quantile_count)\n",
    "        q_tensor = q_component_tensor.mean(2)\n",
    "        action_tensor = q_tensor.argmax(dim=1)\n",
    "        actions = action_tensor.detach().numpy()\n",
    "        action = actions[0]\n",
    "        if self.mode == 'train':\n",
    "            if np.random.rand() < self.epsilon:\n",
    "                action = np.random.randint(0, self.action_n)\n",
    "\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "            if len(self.trajectory) >= 8:\n",
    "                state, _, _, act, next_state, reward, done, _ = \\\n",
    "                        self.trajectory[-8:]\n",
    "                self.replayer.store(state, act, reward, next_state, done)\n",
    "            if self.replayer.count >= 1024 and self.replayer.count % 10 == 0:\n",
    "                self.learn()\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def update_net(self, target_net, evaluate_net, learning_rate=0.005):\n",
    "        for target_param, evaluate_param in zip(\n",
    "                target_net.parameters(), evaluate_net.parameters()):\n",
    "            target_param.data.copy_(learning_rate * evaluate_param.data\n",
    "                    + (1 - learning_rate) * target_param.data)\n",
    "\n",
    "    def learn(self):\n",
    "        # replay\n",
    "        batch_size = 32\n",
    "        states, actions, rewards, next_states, dones = \\\n",
    "                self.replayer.sample(batch_size)\n",
    "        state_tensor = torch.as_tensor(states, dtype=torch.float)\n",
    "        reward_tensor = torch.as_tensor(rewards, dtype=torch.float)\n",
    "        done_tensor = torch.as_tensor(dones, dtype=torch.float)\n",
    "        next_state_tensor = torch.as_tensor(next_states, dtype=torch.float)\n",
    "\n",
    "        # compute target\n",
    "        next_q_component_tensor = self.evaluate_net(next_state_tensor).view(\n",
    "                -1, self.action_n, self.quantile_count)\n",
    "        next_q_tensor = next_q_component_tensor.mean(2)\n",
    "        next_action_tensor = next_q_tensor.argmax(dim=1)\n",
    "        next_actions = next_action_tensor.detach().numpy()\n",
    "        all_next_q_quantile_tensor = self.target_net(next_state_tensor\n",
    "                ).view(-1, self.action_n, self.quantile_count)\n",
    "        next_q_quantile_tensor = all_next_q_quantile_tensor[\n",
    "                range(batch_size), next_actions, :]\n",
    "        target_quantile_tensor = reward_tensor.reshape(batch_size, 1) \\\n",
    "                + self.gamma * next_q_quantile_tensor \\\n",
    "                * (1. - done_tensor).reshape(-1, 1)\n",
    "\n",
    "        all_q_quantile_tensor = self.evaluate_net(state_tensor).view(-1,\n",
    "                self.action_n, self.quantile_count)\n",
    "        q_quantile_tensor = all_q_quantile_tensor[range(batch_size), actions,\n",
    "                :]\n",
    "\n",
    "        target_quantile_tensor = target_quantile_tensor.unsqueeze(1)\n",
    "        q_quantile_tensor = q_quantile_tensor.unsqueeze(2)\n",
    "        hubor_loss_tensor = self.loss(target_quantile_tensor, q_quantile_tensor)\n",
    "        comparison_tensor = (target_quantile_tensor\n",
    "                < q_quantile_tensor).detach().float()\n",
    "        quantile_regression_tensor = (self.cumprob_tensor\n",
    "                - comparison_tensor).abs()\n",
    "        quantile_huber_loss_tensor = (hubor_loss_tensor\n",
    "                * quantile_regression_tensor).sum(-1).mean(1)\n",
    "        loss_tensor = quantile_huber_loss_tensor.mean()\n",
    "        self.optimizer.zero_grad()\n",
    "        loss_tensor.backward()\n",
    "        self.optimizer.step()\n",
    "\n",
    "        self.update_net(self.target_net, self.evaluate_net)\n",
    "\n",
    "        self.epsilon = max(self.epsilon - 1e-5, 0.05)\n",
    "\n",
    "\n",
    "agent = Agent(env)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f02b76e5",
   "metadata": {},
   "source": [
    "Train & Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "34ae8868",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:11:36 [INFO] ==== train ====\n",
      "00:11:57 [DEBUG] train episode 0: reward = -19.00, steps = 1014\n",
      "00:12:33 [DEBUG] train episode 1: reward = -19.00, steps = 1020\n",
      "00:13:04 [DEBUG] train episode 2: reward = -21.00, steps = 878\n",
      "00:13:36 [DEBUG] train episode 3: reward = -20.00, steps = 840\n",
      "00:14:12 [DEBUG] train episode 4: reward = -20.00, steps = 1018\n",
      "00:14:49 [DEBUG] train episode 5: reward = -21.00, steps = 1066\n",
      "00:15:20 [DEBUG] train episode 6: reward = -21.00, steps = 871\n",
      "00:15:57 [DEBUG] train episode 7: reward = -19.00, steps = 1020\n",
      "00:16:34 [DEBUG] train episode 8: reward = -19.00, steps = 1006\n",
      "00:17:02 [DEBUG] train episode 9: reward = -21.00, steps = 758\n",
      "00:17:29 [DEBUG] train episode 10: reward = -21.00, steps = 763\n",
      "00:17:58 [DEBUG] train episode 11: reward = -21.00, steps = 785\n",
      "00:18:25 [DEBUG] train episode 12: reward = -21.00, steps = 757\n",
      "00:18:57 [DEBUG] train episode 13: reward = -20.00, steps = 877\n",
      "00:19:35 [DEBUG] train episode 14: reward = -20.00, steps = 1046\n",
      "00:20:04 [DEBUG] train episode 15: reward = -21.00, steps = 788\n",
      "00:20:37 [DEBUG] train episode 16: reward = -20.00, steps = 919\n",
      "00:21:15 [DEBUG] train episode 17: reward = -18.00, steps = 1012\n",
      "00:21:50 [DEBUG] train episode 18: reward = -21.00, steps = 924\n",
      "00:22:30 [DEBUG] train episode 19: reward = -20.00, steps = 1075\n",
      "00:23:02 [DEBUG] train episode 20: reward = -21.00, steps = 859\n",
      "00:23:34 [DEBUG] train episode 21: reward = -21.00, steps = 874\n",
      "00:24:08 [DEBUG] train episode 22: reward = -21.00, steps = 907\n",
      "00:24:47 [DEBUG] train episode 23: reward = -20.00, steps = 1046\n",
      "00:25:21 [DEBUG] train episode 24: reward = -21.00, steps = 847\n",
      "00:26:02 [DEBUG] train episode 25: reward = -19.00, steps = 959\n",
      "00:26:36 [DEBUG] train episode 26: reward = -21.00, steps = 788\n",
      "00:27:17 [DEBUG] train episode 27: reward = -19.00, steps = 981\n",
      "00:28:01 [DEBUG] train episode 28: reward = -19.00, steps = 1020\n",
      "00:28:36 [DEBUG] train episode 29: reward = -21.00, steps = 871\n",
      "00:29:17 [DEBUG] train episode 30: reward = -20.00, steps = 1020\n",
      "00:29:49 [DEBUG] train episode 31: reward = -21.00, steps = 876\n",
      "00:30:25 [DEBUG] train episode 32: reward = -20.00, steps = 999\n",
      "00:30:53 [DEBUG] train episode 33: reward = -21.00, steps = 778\n",
      "00:31:32 [DEBUG] train episode 34: reward = -19.00, steps = 1067\n",
      "00:32:13 [DEBUG] train episode 35: reward = -20.00, steps = 977\n",
      "00:32:43 [DEBUG] train episode 36: reward = -21.00, steps = 799\n",
      "00:33:17 [DEBUG] train episode 37: reward = -21.00, steps = 937\n",
      "00:33:49 [DEBUG] train episode 38: reward = -21.00, steps = 875\n",
      "00:34:30 [DEBUG] train episode 39: reward = -20.00, steps = 1107\n",
      "00:35:10 [DEBUG] train episode 40: reward = -20.00, steps = 1065\n",
      "00:35:47 [DEBUG] train episode 41: reward = -21.00, steps = 968\n",
      "00:36:18 [DEBUG] train episode 42: reward = -20.00, steps = 831\n",
      "00:37:02 [DEBUG] train episode 43: reward = -20.00, steps = 1127\n",
      "00:37:46 [DEBUG] train episode 44: reward = -21.00, steps = 1128\n",
      "00:38:22 [DEBUG] train episode 45: reward = -21.00, steps = 940\n",
      "00:39:04 [DEBUG] train episode 46: reward = -20.00, steps = 1078\n",
      "00:39:35 [DEBUG] train episode 47: reward = -21.00, steps = 791\n",
      "00:40:12 [DEBUG] train episode 48: reward = -21.00, steps = 1002\n",
      "00:40:44 [DEBUG] train episode 49: reward = -21.00, steps = 843\n",
      "00:41:17 [DEBUG] train episode 50: reward = -20.00, steps = 891\n",
      "00:41:53 [DEBUG] train episode 51: reward = -20.00, steps = 962\n",
      "00:42:23 [DEBUG] train episode 52: reward = -21.00, steps = 787\n",
      "00:42:58 [DEBUG] train episode 53: reward = -21.00, steps = 952\n",
      "00:43:33 [DEBUG] train episode 54: reward = -20.00, steps = 924\n",
      "00:44:08 [DEBUG] train episode 55: reward = -20.00, steps = 919\n",
      "00:44:42 [DEBUG] train episode 56: reward = -19.00, steps = 914\n",
      "00:45:20 [DEBUG] train episode 57: reward = -18.00, steps = 1031\n",
      "00:45:57 [DEBUG] train episode 58: reward = -20.00, steps = 957\n",
      "00:46:34 [DEBUG] train episode 59: reward = -21.00, steps = 966\n",
      "00:47:02 [DEBUG] train episode 60: reward = -21.00, steps = 762\n",
      "00:47:37 [DEBUG] train episode 61: reward = -21.00, steps = 910\n",
      "00:48:16 [DEBUG] train episode 62: reward = -20.00, steps = 1038\n",
      "00:48:58 [DEBUG] train episode 63: reward = -19.00, steps = 1098\n",
      "00:49:33 [DEBUG] train episode 64: reward = -20.00, steps = 934\n",
      "00:50:04 [DEBUG] train episode 65: reward = -21.00, steps = 820\n",
      "00:50:38 [DEBUG] train episode 66: reward = -20.00, steps = 898\n",
      "00:51:12 [DEBUG] train episode 67: reward = -20.00, steps = 903\n",
      "00:51:45 [DEBUG] train episode 68: reward = -20.00, steps = 897\n",
      "00:52:20 [DEBUG] train episode 69: reward = -21.00, steps = 900\n",
      "00:52:59 [DEBUG] train episode 70: reward = -20.00, steps = 961\n",
      "00:53:40 [DEBUG] train episode 71: reward = -21.00, steps = 966\n",
      "00:54:17 [DEBUG] train episode 72: reward = -21.00, steps = 898\n",
      "00:54:49 [DEBUG] train episode 73: reward = -21.00, steps = 787\n",
      "00:55:21 [DEBUG] train episode 74: reward = -21.00, steps = 838\n",
      "00:56:06 [DEBUG] train episode 75: reward = -19.00, steps = 1158\n",
      "00:56:57 [DEBUG] train episode 76: reward = -20.00, steps = 885\n",
      "00:57:50 [DEBUG] train episode 77: reward = -20.00, steps = 915\n",
      "00:58:18 [DEBUG] train episode 78: reward = -21.00, steps = 787\n",
      "00:58:53 [DEBUG] train episode 79: reward = -19.00, steps = 960\n",
      "00:59:24 [DEBUG] train episode 80: reward = -21.00, steps = 845\n",
      "00:59:56 [DEBUG] train episode 81: reward = -20.00, steps = 895\n",
      "01:00:31 [DEBUG] train episode 82: reward = -20.00, steps = 963\n",
      "01:01:06 [DEBUG] train episode 83: reward = -21.00, steps = 817\n",
      "01:01:40 [DEBUG] train episode 84: reward = -21.00, steps = 821\n",
      "01:02:22 [DEBUG] train episode 85: reward = -20.00, steps = 1000\n",
      "01:03:01 [DEBUG] train episode 86: reward = -20.00, steps = 918\n",
      "01:03:50 [DEBUG] train episode 87: reward = -19.00, steps = 1178\n",
      "01:04:27 [DEBUG] train episode 88: reward = -20.00, steps = 887\n",
      "01:05:03 [DEBUG] train episode 89: reward = -21.00, steps = 853\n",
      "01:05:37 [DEBUG] train episode 90: reward = -21.00, steps = 806\n",
      "01:06:11 [DEBUG] train episode 91: reward = -21.00, steps = 817\n",
      "01:06:43 [DEBUG] train episode 92: reward = -21.00, steps = 759\n",
      "01:07:22 [DEBUG] train episode 93: reward = -19.00, steps = 945\n",
      "01:08:05 [DEBUG] train episode 94: reward = -18.00, steps = 1033\n",
      "01:08:56 [DEBUG] train episode 95: reward = -17.00, steps = 1226\n",
      "01:09:28 [DEBUG] train episode 96: reward = -21.00, steps = 763\n",
      "01:10:09 [DEBUG] train episode 97: reward = -19.00, steps = 971\n",
      "01:10:41 [DEBUG] train episode 98: reward = -21.00, steps = 759\n",
      "01:11:26 [DEBUG] train episode 99: reward = -19.00, steps = 1069\n",
      "01:11:57 [DEBUG] train episode 100: reward = -21.00, steps = 760\n",
      "01:12:34 [DEBUG] train episode 101: reward = -21.00, steps = 877\n",
      "01:13:16 [DEBUG] train episode 102: reward = -20.00, steps = 988\n",
      "01:13:58 [DEBUG] train episode 103: reward = -18.00, steps = 1014\n",
      "01:14:33 [DEBUG] train episode 104: reward = -21.00, steps = 843\n",
      "01:15:06 [DEBUG] train episode 105: reward = -21.00, steps = 790\n",
      "01:15:43 [DEBUG] train episode 106: reward = -21.00, steps = 879\n",
      "01:16:34 [DEBUG] train episode 107: reward = -19.00, steps = 1125\n",
      "01:20:09 [DEBUG] train episode 108: reward = -20.00, steps = 964\n",
      "01:23:09 [DEBUG] train episode 109: reward = -21.00, steps = 787\n",
      "01:26:24 [DEBUG] train episode 110: reward = -21.00, steps = 878\n",
      "01:29:32 [DEBUG] train episode 111: reward = -21.00, steps = 837\n",
      "01:32:55 [DEBUG] train episode 112: reward = -20.00, steps = 915\n",
      "01:36:04 [DEBUG] train episode 113: reward = -21.00, steps = 850\n",
      "01:40:11 [DEBUG] train episode 114: reward = -18.00, steps = 1117\n",
      "01:44:21 [DEBUG] train episode 115: reward = -18.00, steps = 1127\n",
      "01:47:46 [DEBUG] train episode 116: reward = -20.00, steps = 929\n",
      "01:51:09 [DEBUG] train episode 117: reward = -21.00, steps = 912\n",
      "01:55:19 [DEBUG] train episode 118: reward = -19.00, steps = 1130\n",
      "01:59:33 [DEBUG] train episode 119: reward = -19.00, steps = 1121\n",
      "02:03:53 [DEBUG] train episode 120: reward = -18.00, steps = 1163\n",
      "02:08:02 [DEBUG] train episode 121: reward = -19.00, steps = 1122\n",
      "02:12:25 [DEBUG] train episode 122: reward = -20.00, steps = 1191\n",
      "02:16:20 [DEBUG] train episode 123: reward = -20.00, steps = 1067\n",
      "02:20:55 [DEBUG] train episode 124: reward = -18.00, steps = 1240\n",
      "02:24:46 [DEBUG] train episode 125: reward = -20.00, steps = 1019\n",
      "02:29:14 [DEBUG] train episode 126: reward = -20.00, steps = 1195\n",
      "02:34:49 [DEBUG] train episode 127: reward = -15.00, steps = 1510\n",
      "02:39:49 [DEBUG] train episode 128: reward = -19.00, steps = 1352\n",
      "02:44:46 [DEBUG] train episode 129: reward = -21.00, steps = 1339\n",
      "02:50:17 [DEBUG] train episode 130: reward = -18.00, steps = 1484\n",
      "02:55:05 [DEBUG] train episode 131: reward = -21.00, steps = 1293\n",
      "02:58:54 [DEBUG] train episode 132: reward = -21.00, steps = 1027\n",
      "03:04:11 [DEBUG] train episode 133: reward = -17.00, steps = 1420\n",
      "03:09:12 [DEBUG] train episode 134: reward = -19.00, steps = 1355\n",
      "03:13:47 [DEBUG] train episode 135: reward = -20.00, steps = 1238\n",
      "03:19:13 [DEBUG] train episode 136: reward = -18.00, steps = 1463\n",
      "03:25:22 [DEBUG] train episode 137: reward = -18.00, steps = 1642\n",
      "03:29:59 [DEBUG] train episode 138: reward = -20.00, steps = 1219\n",
      "03:35:57 [DEBUG] train episode 139: reward = -18.00, steps = 1550\n",
      "03:41:10 [DEBUG] train episode 140: reward = -17.00, steps = 1392\n",
      "03:45:14 [DEBUG] train episode 141: reward = -21.00, steps = 1077\n",
      "03:50:26 [DEBUG] train episode 142: reward = -17.00, steps = 1381\n",
      "03:56:09 [DEBUG] train episode 143: reward = -18.00, steps = 1529\n",
      "04:00:38 [DEBUG] train episode 144: reward = -20.00, steps = 1186\n",
      "04:05:25 [DEBUG] train episode 145: reward = -18.00, steps = 1271\n",
      "04:10:04 [DEBUG] train episode 146: reward = -18.00, steps = 1233\n",
      "04:16:20 [DEBUG] train episode 147: reward = -16.00, steps = 1644\n",
      "04:23:44 [DEBUG] train episode 148: reward = -13.00, steps = 1925\n",
      "04:30:03 [DEBUG] train episode 149: reward = -17.00, steps = 1643\n",
      "04:35:31 [DEBUG] train episode 150: reward = -15.00, steps = 1420\n",
      "04:40:18 [DEBUG] train episode 151: reward = -19.00, steps = 1241\n",
      "04:45:29 [DEBUG] train episode 152: reward = -19.00, steps = 1338\n",
      "04:52:19 [DEBUG] train episode 153: reward = -13.00, steps = 1742\n",
      "04:59:43 [DEBUG] train episode 154: reward = -15.00, steps = 1858\n",
      "05:05:21 [DEBUG] train episode 155: reward = -19.00, steps = 1414\n",
      "05:13:24 [DEBUG] train episode 156: reward = -12.00, steps = 2022\n",
      "05:19:26 [DEBUG] train episode 157: reward = -16.00, steps = 1492\n",
      "05:24:53 [DEBUG] train episode 158: reward = -17.00, steps = 1346\n",
      "05:33:08 [DEBUG] train episode 159: reward = -14.00, steps = 2022\n",
      "05:39:29 [DEBUG] train episode 160: reward = -17.00, steps = 1535\n",
      "05:47:02 [DEBUG] train episode 161: reward = -13.00, steps = 1816\n",
      "05:54:57 [DEBUG] train episode 162: reward = -13.00, steps = 1887\n",
      "06:03:11 [DEBUG] train episode 163: reward = -13.00, steps = 1948\n",
      "06:10:15 [DEBUG] train episode 164: reward = -15.00, steps = 1669\n",
      "06:16:39 [DEBUG] train episode 165: reward = -15.00, steps = 1506\n",
      "06:23:53 [DEBUG] train episode 166: reward = -14.00, steps = 1684\n",
      "06:31:39 [DEBUG] train episode 167: reward = -14.00, steps = 1818\n",
      "06:37:56 [DEBUG] train episode 168: reward = -16.00, steps = 1467\n",
      "06:47:44 [DEBUG] train episode 169: reward = -9.00, steps = 2276\n",
      "06:56:16 [DEBUG] train episode 170: reward = -11.00, steps = 1974\n",
      "07:02:48 [DEBUG] train episode 171: reward = -14.00, steps = 1503\n",
      "07:09:14 [DEBUG] train episode 172: reward = -17.00, steps = 1481\n",
      "07:15:40 [DEBUG] train episode 173: reward = -15.00, steps = 1477\n",
      "07:22:08 [DEBUG] train episode 174: reward = -14.00, steps = 1471\n",
      "07:28:05 [DEBUG] train episode 175: reward = -17.00, steps = 1355\n",
      "07:36:09 [DEBUG] train episode 176: reward = -13.00, steps = 1835\n",
      "07:44:25 [DEBUG] train episode 177: reward = -12.00, steps = 1865\n",
      "07:51:34 [DEBUG] train episode 178: reward = -14.00, steps = 1618\n",
      "07:59:07 [DEBUG] train episode 179: reward = -14.00, steps = 1694\n",
      "08:05:44 [DEBUG] train episode 180: reward = -15.00, steps = 1487\n",
      "08:14:07 [DEBUG] train episode 181: reward = -12.00, steps = 1883\n",
      "08:20:32 [DEBUG] train episode 182: reward = -16.00, steps = 1429\n",
      "08:27:26 [DEBUG] train episode 183: reward = -15.00, steps = 1532\n",
      "08:35:47 [DEBUG] train episode 184: reward = -10.00, steps = 1868\n",
      "08:43:22 [DEBUG] train episode 185: reward = -14.00, steps = 1693\n",
      "08:49:15 [DEBUG] train episode 186: reward = -16.00, steps = 1303\n",
      "08:53:57 [DEBUG] train episode 187: reward = -19.00, steps = 1045\n",
      "08:59:23 [DEBUG] train episode 188: reward = -16.00, steps = 1205\n",
      "09:04:50 [DEBUG] train episode 189: reward = -17.00, steps = 1203\n",
      "09:11:36 [DEBUG] train episode 190: reward = -14.00, steps = 1490\n",
      "09:20:13 [DEBUG] train episode 191: reward = -10.00, steps = 1907\n",
      "09:28:04 [DEBUG] train episode 192: reward = -13.00, steps = 1715\n",
      "09:36:09 [DEBUG] train episode 193: reward = -14.00, steps = 1651\n",
      "09:43:19 [DEBUG] train episode 194: reward = -16.00, steps = 1458\n",
      "09:47:09 [DEBUG] train episode 195: reward = -20.00, steps = 838\n",
      "09:53:26 [DEBUG] train episode 196: reward = -17.00, steps = 1304\n",
      "10:01:54 [DEBUG] train episode 197: reward = -12.00, steps = 1823\n",
      "10:10:05 [DEBUG] train episode 198: reward = -12.00, steps = 1726\n",
      "10:16:34 [DEBUG] train episode 199: reward = -15.00, steps = 1327\n",
      "10:24:02 [DEBUG] train episode 200: reward = -13.00, steps = 1570\n",
      "10:31:48 [DEBUG] train episode 201: reward = -13.00, steps = 1663\n",
      "10:39:38 [DEBUG] train episode 202: reward = -11.00, steps = 1703\n",
      "10:48:01 [DEBUG] train episode 203: reward = -10.00, steps = 1821\n",
      "10:53:47 [DEBUG] train episode 204: reward = -16.00, steps = 1255\n",
      "11:02:59 [DEBUG] train episode 205: reward = -11.00, steps = 1982\n",
      "11:08:00 [DEBUG] train episode 206: reward = -20.00, steps = 1093\n",
      "11:16:26 [DEBUG] train episode 207: reward = -12.00, steps = 1833\n",
      "11:22:32 [DEBUG] train episode 208: reward = -15.00, steps = 1310\n",
      "11:32:12 [DEBUG] train episode 209: reward = -8.00, steps = 2087\n",
      "11:40:08 [DEBUG] train episode 210: reward = -14.00, steps = 1713\n",
      "11:49:32 [DEBUG] train episode 211: reward = -9.00, steps = 2017\n",
      "11:55:52 [DEBUG] train episode 212: reward = -16.00, steps = 1364\n",
      "12:04:21 [DEBUG] train episode 213: reward = -11.00, steps = 1822\n",
      "12:12:44 [DEBUG] train episode 214: reward = -13.00, steps = 1809\n",
      "12:18:58 [DEBUG] train episode 215: reward = -17.00, steps = 1346\n",
      "12:27:58 [DEBUG] train episode 216: reward = -9.00, steps = 1931\n",
      "12:35:01 [DEBUG] train episode 217: reward = -14.00, steps = 1515\n",
      "12:44:35 [DEBUG] train episode 218: reward = -9.00, steps = 2069\n",
      "12:53:30 [DEBUG] train episode 219: reward = -10.00, steps = 1911\n",
      "13:00:16 [DEBUG] train episode 220: reward = -17.00, steps = 1440\n",
      "13:07:43 [DEBUG] train episode 221: reward = -14.00, steps = 1596\n",
      "13:16:45 [DEBUG] train episode 222: reward = -8.00, steps = 1968\n",
      "13:26:51 [DEBUG] train episode 223: reward = -6.00, steps = 2189\n",
      "13:33:45 [DEBUG] train episode 224: reward = -16.00, steps = 1500\n",
      "13:41:11 [DEBUG] train episode 225: reward = -13.00, steps = 1616\n",
      "13:49:41 [DEBUG] train episode 226: reward = -11.00, steps = 1762\n",
      "13:58:18 [DEBUG] train episode 227: reward = -13.00, steps = 1659\n",
      "14:05:49 [DEBUG] train episode 228: reward = -15.00, steps = 1548\n",
      "14:12:26 [DEBUG] train episode 229: reward = -17.00, steps = 1298\n",
      "14:21:20 [DEBUG] train episode 230: reward = -10.00, steps = 1834\n",
      "14:31:20 [DEBUG] train episode 231: reward = -11.00, steps = 2003\n",
      "14:42:13 [DEBUG] train episode 232: reward = -11.00, steps = 1901\n",
      "14:49:03 [DEBUG] train episode 233: reward = -16.00, steps = 1394\n",
      "14:58:52 [DEBUG] train episode 234: reward = -12.00, steps = 1796\n",
      "15:07:55 [DEBUG] train episode 235: reward = -13.00, steps = 1600\n",
      "15:14:55 [DEBUG] train episode 236: reward = -15.00, steps = 1385\n",
      "15:23:54 [DEBUG] train episode 237: reward = -12.00, steps = 1703\n",
      "15:32:23 [DEBUG] train episode 238: reward = -11.00, steps = 1718\n",
      "15:38:06 [DEBUG] train episode 239: reward = -20.00, steps = 1077\n",
      "15:43:43 [DEBUG] train episode 240: reward = -19.00, steps = 1125\n",
      "15:50:04 [DEBUG] train episode 241: reward = -16.00, steps = 1372\n",
      "15:58:15 [DEBUG] train episode 242: reward = -11.00, steps = 1781\n",
      "16:06:06 [DEBUG] train episode 243: reward = -12.00, steps = 1711\n",
      "16:12:44 [DEBUG] train episode 244: reward = -16.00, steps = 1436\n",
      "16:19:25 [DEBUG] train episode 245: reward = -16.00, steps = 1428\n",
      "16:26:55 [DEBUG] train episode 246: reward = -17.00, steps = 1368\n",
      "16:35:48 [DEBUG] train episode 247: reward = -13.00, steps = 1610\n",
      "16:43:19 [DEBUG] train episode 248: reward = -16.00, steps = 1345\n",
      "16:49:23 [DEBUG] train episode 249: reward = -17.00, steps = 1164\n",
      "16:54:57 [DEBUG] train episode 250: reward = -18.00, steps = 1176\n",
      "17:02:05 [DEBUG] train episode 251: reward = -13.00, steps = 1489\n",
      "17:09:09 [DEBUG] train episode 252: reward = -16.00, steps = 1463\n",
      "17:15:51 [DEBUG] train episode 253: reward = -18.00, steps = 1325\n",
      "17:22:18 [DEBUG] train episode 254: reward = -17.00, steps = 1343\n",
      "17:28:44 [DEBUG] train episode 255: reward = -16.00, steps = 1284\n",
      "17:35:59 [DEBUG] train episode 256: reward = -17.00, steps = 1288\n",
      "17:43:46 [DEBUG] train episode 257: reward = -14.00, steps = 1485\n",
      "17:52:44 [DEBUG] train episode 258: reward = -10.00, steps = 1753\n",
      "18:00:00 [DEBUG] train episode 259: reward = -15.00, steps = 1423\n",
      "18:07:25 [DEBUG] train episode 260: reward = -13.00, steps = 1562\n",
      "18:14:34 [DEBUG] train episode 261: reward = -14.00, steps = 1534\n",
      "18:20:42 [DEBUG] train episode 262: reward = -16.00, steps = 1294\n",
      "18:27:36 [DEBUG] train episode 263: reward = -14.00, steps = 1476\n",
      "18:35:11 [DEBUG] train episode 264: reward = -13.00, steps = 1621\n",
      "18:42:30 [DEBUG] train episode 265: reward = -13.00, steps = 1585\n",
      "18:49:48 [DEBUG] train episode 266: reward = -13.00, steps = 1574\n",
      "18:58:02 [DEBUG] train episode 267: reward = -11.00, steps = 1785\n",
      "19:05:10 [DEBUG] train episode 268: reward = -13.00, steps = 1539\n",
      "19:11:35 [DEBUG] train episode 269: reward = -15.00, steps = 1330\n",
      "19:18:51 [DEBUG] train episode 270: reward = -13.00, steps = 1566\n",
      "19:27:32 [DEBUG] train episode 271: reward = -12.00, steps = 1832\n",
      "19:36:16 [DEBUG] train episode 272: reward = -12.00, steps = 1853\n",
      "19:43:48 [DEBUG] train episode 273: reward = -14.00, steps = 1635\n",
      "19:51:56 [DEBUG] train episode 274: reward = -10.00, steps = 1773\n",
      "19:57:46 [DEBUG] train episode 275: reward = -17.00, steps = 1273\n",
      "20:03:22 [DEBUG] train episode 276: reward = -16.00, steps = 1217\n",
      "20:09:44 [DEBUG] train episode 277: reward = -16.00, steps = 1387\n",
      "20:15:59 [DEBUG] train episode 278: reward = -16.00, steps = 1364\n",
      "20:24:06 [DEBUG] train episode 279: reward = -11.00, steps = 1747\n",
      "20:30:33 [DEBUG] train episode 280: reward = -14.00, steps = 1389\n",
      "20:36:15 [DEBUG] train episode 281: reward = -17.00, steps = 1234\n",
      "20:41:58 [DEBUG] train episode 282: reward = -17.00, steps = 1239\n",
      "20:48:29 [DEBUG] train episode 283: reward = -15.00, steps = 1410\n",
      "20:55:38 [DEBUG] train episode 284: reward = -13.00, steps = 1552\n",
      "21:01:20 [DEBUG] train episode 285: reward = -16.00, steps = 1229\n",
      "21:08:27 [DEBUG] train episode 286: reward = -13.00, steps = 1535\n",
      "21:14:19 [DEBUG] train episode 287: reward = -16.00, steps = 1268\n",
      "21:22:45 [DEBUG] train episode 288: reward = -13.00, steps = 1661\n",
      "21:29:53 [DEBUG] train episode 289: reward = -16.00, steps = 1391\n",
      "21:37:16 [DEBUG] train episode 290: reward = -19.00, steps = 1290\n",
      "21:44:27 [DEBUG] train episode 291: reward = -14.00, steps = 1475\n",
      "21:51:57 [DEBUG] train episode 292: reward = -14.00, steps = 1581\n",
      "21:58:06 [DEBUG] train episode 293: reward = -17.00, steps = 1248\n",
      "22:05:22 [DEBUG] train episode 294: reward = -16.00, steps = 1309\n",
      "22:11:32 [DEBUG] train episode 295: reward = -17.00, steps = 1236\n",
      "22:17:35 [DEBUG] train episode 296: reward = -17.00, steps = 1222\n",
      "22:23:05 [DEBUG] train episode 297: reward = -19.00, steps = 1112\n",
      "22:31:53 [DEBUG] train episode 298: reward = -12.00, steps = 1672\n",
      "22:39:14 [DEBUG] train episode 299: reward = -14.00, steps = 1444\n",
      "22:47:48 [DEBUG] train episode 300: reward = -13.00, steps = 1668\n",
      "22:55:44 [DEBUG] train episode 301: reward = -13.00, steps = 1588\n",
      "23:03:20 [DEBUG] train episode 302: reward = -14.00, steps = 1533\n",
      "23:11:49 [DEBUG] train episode 303: reward = -11.00, steps = 1819\n",
      "23:19:26 [DEBUG] train episode 304: reward = -13.00, steps = 1627\n",
      "23:25:23 [DEBUG] train episode 305: reward = -17.00, steps = 1236\n",
      "23:31:44 [DEBUG] train episode 306: reward = -16.00, steps = 1281\n",
      "23:38:44 [DEBUG] train episode 307: reward = -16.00, steps = 1312\n",
      "23:43:46 [DEBUG] train episode 308: reward = -18.00, steps = 1032\n",
      "23:52:15 [DEBUG] train episode 309: reward = -12.00, steps = 1724\n",
      "23:59:27 [DEBUG] train episode 310: reward = -13.00, steps = 1472\n",
      "00:06:31 [DEBUG] train episode 311: reward = -14.00, steps = 1499\n",
      "00:12:20 [DEBUG] train episode 312: reward = -17.00, steps = 1232\n",
      "00:18:19 [DEBUG] train episode 313: reward = -17.00, steps = 1265\n",
      "00:25:30 [DEBUG] train episode 314: reward = -16.00, steps = 1468\n",
      "00:31:19 [DEBUG] train episode 315: reward = -17.00, steps = 1252\n",
      "00:36:15 [DEBUG] train episode 316: reward = -18.00, steps = 1043\n",
      "00:44:12 [DEBUG] train episode 317: reward = -12.00, steps = 1639\n",
      "00:51:55 [DEBUG] train episode 318: reward = -11.00, steps = 1705\n",
      "00:58:13 [DEBUG] train episode 319: reward = -16.00, steps = 1393\n",
      "01:03:37 [DEBUG] train episode 320: reward = -17.00, steps = 1197\n",
      "01:10:46 [DEBUG] train episode 321: reward = -15.00, steps = 1578\n",
      "01:15:36 [DEBUG] train episode 322: reward = -18.00, steps = 1076\n",
      "01:21:47 [DEBUG] train episode 323: reward = -16.00, steps = 1358\n",
      "01:28:12 [DEBUG] train episode 324: reward = -15.00, steps = 1422\n",
      "01:34:23 [DEBUG] train episode 325: reward = -17.00, steps = 1368\n",
      "01:39:13 [DEBUG] train episode 326: reward = -19.00, steps = 1073\n",
      "01:46:17 [DEBUG] train episode 327: reward = -15.00, steps = 1572\n",
      "01:51:31 [DEBUG] train episode 328: reward = -18.00, steps = 1164\n",
      "01:58:04 [DEBUG] train episode 329: reward = -16.00, steps = 1457\n",
      "02:03:43 [DEBUG] train episode 330: reward = -16.00, steps = 1248\n",
      "02:10:23 [DEBUG] train episode 331: reward = -15.00, steps = 1477\n",
      "02:15:36 [DEBUG] train episode 332: reward = -17.00, steps = 1154\n",
      "02:22:47 [DEBUG] train episode 333: reward = -14.00, steps = 1577\n",
      "02:29:06 [DEBUG] train episode 334: reward = -16.00, steps = 1397\n",
      "02:34:17 [DEBUG] train episode 335: reward = -19.00, steps = 1141\n",
      "02:40:42 [DEBUG] train episode 336: reward = -15.00, steps = 1415\n",
      "02:47:49 [DEBUG] train episode 337: reward = -13.00, steps = 1563\n",
      "02:55:22 [DEBUG] train episode 338: reward = -13.00, steps = 1664\n",
      "03:00:32 [DEBUG] train episode 339: reward = -18.00, steps = 1142\n",
      "03:07:55 [DEBUG] train episode 340: reward = -12.00, steps = 1637\n",
      "03:13:12 [DEBUG] train episode 341: reward = -18.00, steps = 1169\n",
      "03:18:48 [DEBUG] train episode 342: reward = -16.00, steps = 1239\n",
      "03:24:55 [DEBUG] train episode 343: reward = -16.00, steps = 1302\n",
      "03:31:51 [DEBUG] train episode 344: reward = -15.00, steps = 1452\n",
      "03:36:18 [DEBUG] train episode 345: reward = -19.00, steps = 978\n",
      "03:41:09 [DEBUG] train episode 346: reward = -18.00, steps = 1066\n",
      "03:46:51 [DEBUG] train episode 347: reward = -16.00, steps = 1249\n",
      "03:52:42 [DEBUG] train episode 348: reward = -16.00, steps = 1285\n",
      "03:58:38 [DEBUG] train episode 349: reward = -17.00, steps = 1298\n",
      "04:03:33 [DEBUG] train episode 350: reward = -20.00, steps = 1081\n",
      "04:09:28 [DEBUG] train episode 351: reward = -17.00, steps = 1294\n",
      "04:14:32 [DEBUG] train episode 352: reward = -18.00, steps = 1110\n",
      "04:20:20 [DEBUG] train episode 353: reward = -16.00, steps = 1267\n",
      "04:26:18 [DEBUG] train episode 354: reward = -16.00, steps = 1288\n",
      "04:31:24 [DEBUG] train episode 355: reward = -18.00, steps = 1115\n",
      "04:36:30 [DEBUG] train episode 356: reward = -18.00, steps = 1111\n",
      "04:42:43 [DEBUG] train episode 357: reward = -17.00, steps = 1357\n",
      "04:49:13 [DEBUG] train episode 358: reward = -15.00, steps = 1422\n",
      "04:54:53 [DEBUG] train episode 359: reward = -17.00, steps = 1243\n",
      "05:00:29 [DEBUG] train episode 360: reward = -17.00, steps = 1225\n",
      "05:05:11 [DEBUG] train episode 361: reward = -18.00, steps = 1030\n",
      "05:10:09 [DEBUG] train episode 362: reward = -18.00, steps = 1085\n",
      "05:16:48 [DEBUG] train episode 363: reward = -15.00, steps = 1461\n",
      "05:21:58 [DEBUG] train episode 364: reward = -17.00, steps = 1123\n",
      "05:27:25 [DEBUG] train episode 365: reward = -18.00, steps = 1196\n",
      "05:32:26 [DEBUG] train episode 366: reward = -18.00, steps = 1096\n",
      "05:38:58 [DEBUG] train episode 367: reward = -16.00, steps = 1440\n",
      "05:44:49 [DEBUG] train episode 368: reward = -18.00, steps = 1286\n",
      "05:48:48 [DEBUG] train episode 369: reward = -21.00, steps = 874\n",
      "05:55:22 [DEBUG] train episode 370: reward = -15.00, steps = 1440\n",
      "06:00:25 [DEBUG] train episode 371: reward = -19.00, steps = 1105\n",
      "06:05:33 [DEBUG] train episode 372: reward = -18.00, steps = 1128\n",
      "06:12:23 [DEBUG] train episode 373: reward = -15.00, steps = 1499\n",
      "06:16:50 [DEBUG] train episode 374: reward = -20.00, steps = 978\n",
      "06:24:32 [DEBUG] train episode 375: reward = -13.00, steps = 1688\n",
      "06:31:59 [DEBUG] train episode 376: reward = -14.00, steps = 1630\n",
      "06:38:25 [DEBUG] train episode 377: reward = -17.00, steps = 1411\n",
      "06:45:26 [DEBUG] train episode 378: reward = -14.00, steps = 1544\n",
      "06:51:11 [DEBUG] train episode 379: reward = -16.00, steps = 1264\n",
      "06:57:41 [DEBUG] train episode 380: reward = -15.00, steps = 1424\n",
      "07:04:57 [DEBUG] train episode 381: reward = -15.00, steps = 1599\n",
      "07:11:02 [DEBUG] train episode 382: reward = -16.00, steps = 1333\n",
      "07:17:22 [DEBUG] train episode 383: reward = -15.00, steps = 1394\n",
      "07:22:52 [DEBUG] train episode 384: reward = -17.00, steps = 1192\n",
      "07:29:39 [DEBUG] train episode 385: reward = -15.00, steps = 1467\n",
      "07:38:30 [DEBUG] train episode 386: reward = -10.00, steps = 1930\n",
      "07:44:24 [DEBUG] train episode 387: reward = -15.00, steps = 1290\n",
      "07:50:58 [DEBUG] train episode 388: reward = -17.00, steps = 1441\n",
      "07:58:49 [DEBUG] train episode 389: reward = -16.00, steps = 1718\n",
      "08:03:12 [DEBUG] train episode 390: reward = -20.00, steps = 956\n",
      "08:10:57 [DEBUG] train episode 391: reward = -12.00, steps = 1686\n",
      "08:17:07 [DEBUG] train episode 392: reward = -17.00, steps = 1337\n",
      "08:22:23 [DEBUG] train episode 393: reward = -20.00, steps = 1137\n",
      "08:28:31 [DEBUG] train episode 394: reward = -16.00, steps = 1329\n",
      "08:33:16 [DEBUG] train episode 395: reward = -18.00, steps = 1029\n",
      "08:39:33 [DEBUG] train episode 396: reward = -15.00, steps = 1375\n",
      "08:44:34 [DEBUG] train episode 397: reward = -18.00, steps = 1097\n",
      "08:51:55 [DEBUG] train episode 398: reward = -15.00, steps = 1611\n",
      "08:57:41 [DEBUG] train episode 399: reward = -17.00, steps = 1260\n",
      "09:03:23 [DEBUG] train episode 400: reward = -17.00, steps = 1240\n",
      "09:08:11 [DEBUG] train episode 401: reward = -19.00, steps = 1047\n",
      "09:13:23 [DEBUG] train episode 402: reward = -18.00, steps = 1133\n",
      "09:20:14 [DEBUG] train episode 403: reward = -15.00, steps = 1495\n",
      "09:24:35 [DEBUG] train episode 404: reward = -20.00, steps = 925\n",
      "09:30:30 [DEBUG] train episode 405: reward = -16.00, steps = 1281\n",
      "09:37:41 [DEBUG] train episode 406: reward = -15.00, steps = 1561\n",
      "09:45:06 [DEBUG] train episode 407: reward = -16.00, steps = 1606\n",
      "09:51:06 [DEBUG] train episode 408: reward = -17.00, steps = 1300\n",
      "09:57:27 [DEBUG] train episode 409: reward = -16.00, steps = 1374\n",
      "10:04:43 [DEBUG] train episode 410: reward = -16.00, steps = 1277\n",
      "10:09:54 [DEBUG] train episode 411: reward = -19.00, steps = 960\n",
      "10:15:56 [DEBUG] train episode 412: reward = -17.00, steps = 1143\n",
      "10:23:33 [DEBUG] train episode 413: reward = -15.00, steps = 1379\n",
      "10:30:01 [DEBUG] train episode 414: reward = -16.00, steps = 1313\n",
      "10:40:39 [DEBUG] train episode 415: reward = -11.00, steps = 1811\n",
      "10:48:12 [DEBUG] train episode 416: reward = -15.00, steps = 1437\n",
      "10:55:13 [DEBUG] train episode 417: reward = -16.00, steps = 1302\n",
      "11:03:32 [DEBUG] train episode 418: reward = -15.00, steps = 1526\n",
      "11:12:06 [DEBUG] train episode 419: reward = -13.00, steps = 1606\n",
      "11:17:52 [DEBUG] train episode 420: reward = -17.00, steps = 1192\n",
      "11:24:10 [DEBUG] train episode 421: reward = -15.00, steps = 1316\n",
      "11:31:27 [DEBUG] train episode 422: reward = -15.00, steps = 1536\n",
      "11:39:02 [DEBUG] train episode 423: reward = -14.00, steps = 1527\n",
      "11:43:33 [DEBUG] train episode 424: reward = -20.00, steps = 947\n",
      "11:51:19 [DEBUG] train episode 425: reward = -14.00, steps = 1598\n",
      "11:59:16 [DEBUG] train episode 426: reward = -14.00, steps = 1623\n",
      "12:04:19 [DEBUG] train episode 427: reward = -20.00, steps = 916\n",
      "12:10:06 [DEBUG] train episode 428: reward = -17.00, steps = 1212\n",
      "12:16:00 [DEBUG] train episode 429: reward = -17.00, steps = 1261\n",
      "12:22:56 [DEBUG] train episode 430: reward = -16.00, steps = 1460\n",
      "12:29:28 [DEBUG] train episode 431: reward = -16.00, steps = 1384\n",
      "12:33:57 [DEBUG] train episode 432: reward = -20.00, steps = 952\n",
      "12:38:49 [DEBUG] train episode 433: reward = -19.00, steps = 1039\n",
      "12:44:37 [DEBUG] train episode 434: reward = -18.00, steps = 1219\n",
      "12:51:33 [DEBUG] train episode 435: reward = -15.00, steps = 1472\n",
      "12:57:55 [DEBUG] train episode 436: reward = -15.00, steps = 1359\n",
      "13:03:52 [DEBUG] train episode 437: reward = -16.00, steps = 1271\n",
      "13:08:05 [DEBUG] train episode 438: reward = -20.00, steps = 897\n",
      "13:12:11 [DEBUG] train episode 439: reward = -21.00, steps = 878\n",
      "13:18:43 [DEBUG] train episode 440: reward = -16.00, steps = 1397\n",
      "13:25:31 [DEBUG] train episode 441: reward = -16.00, steps = 1432\n",
      "13:33:20 [DEBUG] train episode 442: reward = -13.00, steps = 1601\n",
      "13:41:04 [DEBUG] train episode 443: reward = -13.00, steps = 1633\n",
      "13:49:27 [DEBUG] train episode 444: reward = -13.00, steps = 1780\n",
      "13:55:18 [DEBUG] train episode 445: reward = -17.00, steps = 1239\n",
      "14:03:12 [DEBUG] train episode 446: reward = -12.00, steps = 1679\n",
      "14:09:31 [DEBUG] train episode 447: reward = -18.00, steps = 1178\n",
      "14:17:20 [DEBUG] train episode 448: reward = -16.00, steps = 1403\n",
      "14:25:12 [DEBUG] train episode 449: reward = -16.00, steps = 1419\n",
      "14:33:27 [DEBUG] train episode 450: reward = -14.00, steps = 1499\n",
      "14:40:07 [DEBUG] train episode 451: reward = -17.00, steps = 1197\n",
      "14:47:03 [DEBUG] train episode 452: reward = -17.00, steps = 1244\n",
      "14:53:35 [DEBUG] train episode 453: reward = -18.00, steps = 1175\n",
      "14:59:48 [DEBUG] train episode 454: reward = -16.00, steps = 1302\n",
      "15:06:20 [DEBUG] train episode 455: reward = -16.00, steps = 1386\n",
      "15:12:39 [DEBUG] train episode 456: reward = -15.00, steps = 1344\n",
      "15:18:07 [DEBUG] train episode 457: reward = -17.00, steps = 1167\n",
      "15:24:12 [DEBUG] train episode 458: reward = -16.00, steps = 1279\n",
      "15:30:08 [DEBUG] train episode 459: reward = -16.00, steps = 1256\n",
      "15:35:41 [DEBUG] train episode 460: reward = -17.00, steps = 1183\n",
      "15:41:14 [DEBUG] train episode 461: reward = -17.00, steps = 1181\n",
      "15:46:57 [DEBUG] train episode 462: reward = -16.00, steps = 1220\n",
      "15:53:31 [DEBUG] train episode 463: reward = -16.00, steps = 1398\n",
      "15:58:27 [DEBUG] train episode 464: reward = -19.00, steps = 1053\n",
      "16:03:11 [DEBUG] train episode 465: reward = -18.00, steps = 1010\n",
      "16:08:04 [DEBUG] train episode 466: reward = -19.00, steps = 1045\n",
      "16:12:55 [DEBUG] train episode 467: reward = -18.00, steps = 1033\n",
      "16:18:38 [DEBUG] train episode 468: reward = -16.00, steps = 1218\n",
      "16:25:11 [DEBUG] train episode 469: reward = -15.00, steps = 1381\n",
      "16:31:01 [DEBUG] train episode 470: reward = -17.00, steps = 1230\n",
      "16:37:17 [DEBUG] train episode 471: reward = -16.00, steps = 1328\n",
      "16:43:21 [DEBUG] train episode 472: reward = -16.00, steps = 1286\n",
      "16:49:45 [DEBUG] train episode 473: reward = -16.00, steps = 1372\n",
      "16:57:21 [DEBUG] train episode 474: reward = -13.00, steps = 1620\n",
      "17:03:17 [DEBUG] train episode 475: reward = -17.00, steps = 1201\n",
      "17:09:41 [DEBUG] train episode 476: reward = -16.00, steps = 1320\n",
      "17:15:10 [DEBUG] train episode 477: reward = -18.00, steps = 1091\n",
      "17:21:25 [DEBUG] train episode 478: reward = -18.00, steps = 1142\n",
      "17:25:46 [DEBUG] train episode 479: reward = -20.00, steps = 901\n",
      "17:32:36 [DEBUG] train episode 480: reward = -18.00, steps = 1144\n",
      "17:40:25 [DEBUG] train episode 481: reward = -15.00, steps = 1341\n",
      "17:47:03 [DEBUG] train episode 482: reward = -16.00, steps = 1256\n",
      "17:52:58 [DEBUG] train episode 483: reward = -19.00, steps = 1191\n",
      "18:01:01 [DEBUG] train episode 484: reward = -13.00, steps = 1625\n",
      "18:07:38 [DEBUG] train episode 485: reward = -16.00, steps = 1309\n",
      "18:13:16 [DEBUG] train episode 486: reward = -18.00, steps = 1145\n",
      "18:17:44 [DEBUG] train episode 487: reward = -21.00, steps = 764\n",
      "18:23:46 [DEBUG] train episode 488: reward = -19.00, steps = 1053\n",
      "18:31:09 [DEBUG] train episode 489: reward = -14.00, steps = 1438\n",
      "18:38:15 [DEBUG] train episode 490: reward = -16.00, steps = 1379\n",
      "18:43:37 [DEBUG] train episode 491: reward = -19.00, steps = 1061\n",
      "18:49:21 [DEBUG] train episode 492: reward = -19.00, steps = 1133\n",
      "18:56:31 [DEBUG] train episode 493: reward = -13.00, steps = 1416\n",
      "19:01:28 [DEBUG] train episode 494: reward = -19.00, steps = 993\n",
      "19:05:59 [DEBUG] train episode 495: reward = -21.00, steps = 903\n",
      "19:11:43 [DEBUG] train episode 496: reward = -18.00, steps = 1193\n",
      "19:16:39 [DEBUG] train episode 497: reward = -19.00, steps = 1038\n",
      "19:24:18 [DEBUG] train episode 498: reward = -13.00, steps = 1579\n",
      "19:32:55 [DEBUG] train episode 499: reward = -12.00, steps = 1706\n",
      "19:37:10 [DEBUG] train episode 500: reward = -20.00, steps = 865\n",
      "19:45:23 [DEBUG] train episode 501: reward = -11.00, steps = 1708\n",
      "19:49:48 [DEBUG] train episode 502: reward = -20.00, steps = 933\n",
      "19:56:19 [DEBUG] train episode 503: reward = -15.00, steps = 1364\n",
      "20:00:10 [DEBUG] train episode 504: reward = -21.00, steps = 786\n",
      "20:04:17 [DEBUG] train episode 505: reward = -20.00, steps = 835\n",
      "20:09:05 [DEBUG] train episode 506: reward = -20.00, steps = 1004\n",
      "20:13:48 [DEBUG] train episode 507: reward = -20.00, steps = 959\n",
      "20:20:24 [DEBUG] train episode 508: reward = -16.00, steps = 1356\n",
      "20:24:48 [DEBUG] train episode 509: reward = -20.00, steps = 905\n",
      "20:29:42 [DEBUG] train episode 510: reward = -21.00, steps = 1022\n",
      "20:33:56 [DEBUG] train episode 511: reward = -21.00, steps = 897\n",
      "20:39:10 [DEBUG] train episode 512: reward = -19.00, steps = 1109\n",
      "20:46:59 [DEBUG] train episode 513: reward = -12.00, steps = 1663\n",
      "20:52:03 [DEBUG] train episode 514: reward = -19.00, steps = 959\n",
      "21:00:36 [DEBUG] train episode 515: reward = -13.00, steps = 1530\n",
      "21:05:51 [DEBUG] train episode 516: reward = -20.00, steps = 921\n",
      "21:12:25 [DEBUG] train episode 517: reward = -19.00, steps = 1170\n",
      "21:19:18 [DEBUG] train episode 518: reward = -17.00, steps = 1189\n",
      "21:25:38 [DEBUG] train episode 519: reward = -17.00, steps = 1151\n",
      "21:30:53 [DEBUG] train episode 520: reward = -20.00, steps = 914\n",
      "21:36:55 [DEBUG] train episode 521: reward = -18.00, steps = 1125\n",
      "21:41:12 [DEBUG] train episode 522: reward = -21.00, steps = 852\n",
      "21:47:41 [DEBUG] train episode 523: reward = -21.00, steps = 943\n",
      "21:53:59 [DEBUG] train episode 524: reward = -21.00, steps = 965\n",
      "21:59:05 [DEBUG] train episode 525: reward = -21.00, steps = 912\n",
      "22:04:58 [DEBUG] train episode 526: reward = -19.00, steps = 1077\n",
      "22:11:58 [DEBUG] train episode 527: reward = -17.00, steps = 1311\n",
      "22:22:01 [DEBUG] train episode 528: reward = -9.00, steps = 1914\n",
      "22:28:18 [DEBUG] train episode 529: reward = -19.00, steps = 1133\n",
      "22:34:03 [DEBUG] train episode 530: reward = -19.00, steps = 1032\n",
      "22:39:21 [DEBUG] train episode 531: reward = -20.00, steps = 948\n",
      "22:48:05 [DEBUG] train episode 532: reward = -14.00, steps = 1489\n",
      "22:57:47 [DEBUG] train episode 533: reward = -10.00, steps = 1965\n",
      "23:03:40 [DEBUG] train episode 534: reward = -17.00, steps = 1237\n",
      "23:08:52 [DEBUG] train episode 535: reward = -18.00, steps = 1102\n",
      "23:16:23 [DEBUG] train episode 536: reward = -15.00, steps = 1436\n",
      "23:21:43 [DEBUG] train episode 537: reward = -19.00, steps = 1065\n",
      "23:29:00 [DEBUG] train episode 538: reward = -14.00, steps = 1542\n",
      "23:34:47 [DEBUG] train episode 539: reward = -16.00, steps = 1217\n",
      "23:39:31 [DEBUG] train episode 540: reward = -19.00, steps = 987\n",
      "23:47:34 [DEBUG] train episode 541: reward = -12.00, steps = 1665\n",
      "23:56:13 [DEBUG] train episode 542: reward = -13.00, steps = 1581\n",
      "00:02:36 [DEBUG] train episode 543: reward = -18.00, steps = 1118\n",
      "00:07:40 [DEBUG] train episode 544: reward = -19.00, steps = 997\n",
      "00:12:51 [DEBUG] train episode 545: reward = -18.00, steps = 1022\n",
      "00:18:37 [DEBUG] train episode 546: reward = -18.00, steps = 1172\n",
      "00:22:32 [DEBUG] train episode 547: reward = -21.00, steps = 781\n",
      "00:27:13 [DEBUG] train episode 548: reward = -19.00, steps = 996\n",
      "00:32:53 [DEBUG] train episode 549: reward = -18.00, steps = 1193\n",
      "00:39:58 [DEBUG] train episode 550: reward = -16.00, steps = 1364\n",
      "00:45:24 [DEBUG] train episode 551: reward = -18.00, steps = 1181\n",
      "00:52:00 [DEBUG] train episode 552: reward = -15.00, steps = 1449\n",
      "00:58:45 [DEBUG] train episode 553: reward = -13.00, steps = 1483\n",
      "01:07:22 [DEBUG] train episode 554: reward = -10.00, steps = 1897\n",
      "01:13:04 [DEBUG] train episode 555: reward = -16.00, steps = 1259\n",
      "01:17:32 [DEBUG] train episode 556: reward = -20.00, steps = 980\n",
      "01:23:49 [DEBUG] train episode 557: reward = -16.00, steps = 1369\n",
      "01:29:39 [DEBUG] train episode 558: reward = -16.00, steps = 1278\n",
      "01:38:27 [DEBUG] train episode 559: reward = -11.00, steps = 1932\n",
      "01:47:50 [DEBUG] train episode 560: reward = -7.00, steps = 2062\n",
      "01:56:49 [DEBUG] train episode 561: reward = -8.00, steps = 1983\n",
      "02:03:08 [DEBUG] train episode 562: reward = -16.00, steps = 1384\n",
      "02:09:22 [DEBUG] train episode 563: reward = -15.00, steps = 1371\n",
      "02:14:37 [DEBUG] train episode 564: reward = -20.00, steps = 1157\n",
      "02:20:36 [DEBUG] train episode 565: reward = -16.00, steps = 1313\n",
      "02:25:20 [DEBUG] train episode 566: reward = -19.00, steps = 1021\n",
      "02:30:39 [DEBUG] train episode 567: reward = -18.00, steps = 1165\n",
      "02:36:58 [DEBUG] train episode 568: reward = -17.00, steps = 1382\n",
      "02:46:03 [DEBUG] train episode 569: reward = -10.00, steps = 1990\n",
      "02:53:13 [DEBUG] train episode 570: reward = -15.00, steps = 1574\n",
      "02:57:53 [DEBUG] train episode 571: reward = -19.00, steps = 1024\n",
      "03:04:26 [DEBUG] train episode 572: reward = -15.00, steps = 1434\n",
      "03:11:25 [DEBUG] train episode 573: reward = -15.00, steps = 1526\n",
      "03:17:56 [DEBUG] train episode 574: reward = -16.00, steps = 1428\n",
      "03:24:38 [DEBUG] train episode 575: reward = -16.00, steps = 1399\n",
      "03:34:16 [DEBUG] train episode 576: reward = -8.00, steps = 2075\n",
      "03:43:19 [DEBUG] train episode 577: reward = -9.00, steps = 1956\n",
      "03:49:49 [DEBUG] train episode 578: reward = -17.00, steps = 1404\n",
      "03:56:03 [DEBUG] train episode 579: reward = -16.00, steps = 1346\n",
      "04:03:23 [DEBUG] train episode 580: reward = -14.00, steps = 1575\n",
      "04:11:14 [DEBUG] train episode 581: reward = -12.00, steps = 1699\n",
      "04:17:01 [DEBUG] train episode 582: reward = -16.00, steps = 1243\n",
      "04:24:24 [DEBUG] train episode 583: reward = -14.00, steps = 1534\n",
      "04:28:59 [DEBUG] train episode 584: reward = -19.00, steps = 992\n",
      "04:36:56 [DEBUG] train episode 585: reward = -13.00, steps = 1712\n",
      "04:41:32 [DEBUG] train episode 586: reward = -19.00, steps = 996\n",
      "04:49:55 [DEBUG] train episode 587: reward = -10.00, steps = 1823\n",
      "04:55:08 [DEBUG] train episode 588: reward = -20.00, steps = 1127\n",
      "05:01:20 [DEBUG] train episode 589: reward = -16.00, steps = 1339\n",
      "05:06:23 [DEBUG] train episode 590: reward = -19.00, steps = 1090\n",
      "05:13:52 [DEBUG] train episode 591: reward = -13.00, steps = 1622\n",
      "05:20:41 [DEBUG] train episode 592: reward = -14.00, steps = 1477\n",
      "05:26:58 [DEBUG] train episode 593: reward = -15.00, steps = 1338\n",
      "05:34:42 [DEBUG] train episode 594: reward = -13.00, steps = 1657\n",
      "05:39:49 [DEBUG] train episode 595: reward = -18.00, steps = 1109\n",
      "05:47:07 [DEBUG] train episode 596: reward = -13.00, steps = 1577\n",
      "05:51:51 [DEBUG] train episode 597: reward = -19.00, steps = 1024\n",
      "05:57:25 [DEBUG] train episode 598: reward = -17.00, steps = 1208\n",
      "06:03:24 [DEBUG] train episode 599: reward = -16.00, steps = 1286\n",
      "06:10:11 [DEBUG] train episode 600: reward = -15.00, steps = 1464\n",
      "06:15:28 [DEBUG] train episode 601: reward = -17.00, steps = 1143\n",
      "06:19:59 [DEBUG] train episode 602: reward = -20.00, steps = 974\n",
      "06:25:16 [DEBUG] train episode 603: reward = -18.00, steps = 1123\n",
      "06:31:10 [DEBUG] train episode 604: reward = -18.00, steps = 1265\n",
      "06:37:13 [DEBUG] train episode 605: reward = -16.00, steps = 1302\n",
      "06:42:31 [DEBUG] train episode 606: reward = -18.00, steps = 1144\n",
      "06:47:40 [DEBUG] train episode 607: reward = -18.00, steps = 1109\n",
      "06:53:39 [DEBUG] train episode 608: reward = -15.00, steps = 1293\n",
      "06:59:27 [DEBUG] train episode 609: reward = -17.00, steps = 1247\n",
      "07:05:10 [DEBUG] train episode 610: reward = -16.00, steps = 1232\n",
      "07:11:10 [DEBUG] train episode 611: reward = -16.00, steps = 1294\n",
      "07:19:51 [DEBUG] train episode 612: reward = -11.00, steps = 1869\n",
      "07:26:25 [DEBUG] train episode 613: reward = -14.00, steps = 1404\n",
      "07:31:19 [DEBUG] train episode 614: reward = -19.00, steps = 1050\n",
      "07:37:24 [DEBUG] train episode 615: reward = -18.00, steps = 1312\n",
      "07:42:18 [DEBUG] train episode 616: reward = -20.00, steps = 1011\n",
      "07:47:01 [DEBUG] train episode 617: reward = -19.00, steps = 1022\n",
      "07:53:12 [DEBUG] train episode 618: reward = -16.00, steps = 1340\n",
      "08:01:57 [DEBUG] train episode 619: reward = -10.00, steps = 1900\n",
      "08:08:01 [DEBUG] train episode 620: reward = -18.00, steps = 1316\n",
      "08:15:09 [DEBUG] train episode 621: reward = -14.00, steps = 1556\n",
      "08:22:47 [DEBUG] train episode 622: reward = -10.00, steps = 1868\n",
      "08:30:33 [DEBUG] train episode 623: reward = -13.00, steps = 1680\n",
      "08:37:50 [DEBUG] train episode 624: reward = -13.00, steps = 1580\n",
      "08:45:22 [DEBUG] train episode 625: reward = -15.00, steps = 1639\n",
      "08:52:44 [DEBUG] train episode 626: reward = -12.00, steps = 1603\n",
      "08:59:31 [DEBUG] train episode 627: reward = -14.00, steps = 1474\n",
      "09:07:39 [DEBUG] train episode 628: reward = -11.00, steps = 1764\n",
      "09:13:04 [DEBUG] train episode 629: reward = -17.00, steps = 1172\n",
      "09:19:53 [DEBUG] train episode 630: reward = -14.00, steps = 1480\n",
      "09:27:59 [DEBUG] train episode 631: reward = -13.00, steps = 1659\n",
      "09:33:41 [DEBUG] train episode 632: reward = -18.00, steps = 1066\n",
      "09:40:16 [DEBUG] train episode 633: reward = -18.00, steps = 1334\n",
      "09:48:37 [DEBUG] train episode 634: reward = -14.00, steps = 1751\n",
      "09:52:55 [DEBUG] train episode 635: reward = -21.00, steps = 851\n",
      "09:58:32 [DEBUG] train episode 636: reward = -19.00, steps = 1105\n",
      "10:05:50 [DEBUG] train episode 637: reward = -14.00, steps = 1410\n",
      "10:11:59 [DEBUG] train episode 638: reward = -17.00, steps = 1201\n",
      "10:16:43 [DEBUG] train episode 639: reward = -20.00, steps = 954\n",
      "10:21:37 [DEBUG] train episode 640: reward = -19.00, steps = 993\n",
      "10:26:36 [DEBUG] train episode 641: reward = -19.00, steps = 970\n",
      "10:33:29 [DEBUG] train episode 642: reward = -15.00, steps = 1359\n",
      "10:38:42 [DEBUG] train episode 643: reward = -19.00, steps = 1038\n",
      "10:44:03 [DEBUG] train episode 644: reward = -19.00, steps = 1072\n",
      "10:49:23 [DEBUG] train episode 645: reward = -19.00, steps = 1074\n",
      "10:54:54 [DEBUG] train episode 646: reward = -18.00, steps = 1116\n",
      "10:59:49 [DEBUG] train episode 647: reward = -19.00, steps = 993\n",
      "11:03:55 [DEBUG] train episode 648: reward = -20.00, steps = 835\n",
      "11:09:09 [DEBUG] train episode 649: reward = -20.00, steps = 1047\n",
      "11:14:30 [DEBUG] train episode 650: reward = -18.00, steps = 1093\n",
      "11:20:05 [DEBUG] train episode 651: reward = -18.00, steps = 1134\n",
      "11:24:26 [DEBUG] train episode 652: reward = -20.00, steps = 883\n",
      "11:29:05 [DEBUG] train episode 653: reward = -20.00, steps = 953\n",
      "11:34:57 [DEBUG] train episode 654: reward = -17.00, steps = 1217\n",
      "11:40:20 [DEBUG] train episode 655: reward = -18.00, steps = 1131\n",
      "11:45:49 [DEBUG] train episode 656: reward = -18.00, steps = 1149\n",
      "11:52:44 [DEBUG] train episode 657: reward = -15.00, steps = 1421\n",
      "11:58:49 [DEBUG] train episode 658: reward = -17.00, steps = 1269\n",
      "12:04:42 [DEBUG] train episode 659: reward = -18.00, steps = 1219\n",
      "12:09:19 [DEBUG] train episode 660: reward = -19.00, steps = 962\n",
      "12:15:25 [DEBUG] train episode 661: reward = -16.00, steps = 1278\n",
      "12:21:33 [DEBUG] train episode 662: reward = -16.00, steps = 1279\n",
      "12:28:13 [DEBUG] train episode 663: reward = -15.00, steps = 1397\n",
      "12:33:57 [DEBUG] train episode 664: reward = -17.00, steps = 1204\n",
      "12:39:27 [DEBUG] train episode 665: reward = -18.00, steps = 1161\n",
      "12:47:14 [DEBUG] train episode 666: reward = -13.00, steps = 1630\n",
      "12:54:19 [DEBUG] train episode 667: reward = -14.00, steps = 1473\n",
      "13:01:30 [DEBUG] train episode 668: reward = -17.00, steps = 1491\n",
      "13:07:40 [DEBUG] train episode 669: reward = -17.00, steps = 1287\n",
      "13:13:37 [DEBUG] train episode 670: reward = -17.00, steps = 1253\n",
      "13:21:22 [DEBUG] train episode 671: reward = -13.00, steps = 1605\n",
      "13:27:04 [DEBUG] train episode 672: reward = -18.00, steps = 1190\n",
      "13:32:50 [DEBUG] train episode 673: reward = -18.00, steps = 1203\n",
      "13:38:50 [DEBUG] train episode 674: reward = -16.00, steps = 1260\n",
      "13:43:49 [DEBUG] train episode 675: reward = -20.00, steps = 1045\n",
      "13:48:40 [DEBUG] train episode 676: reward = -20.00, steps = 1007\n",
      "13:56:07 [DEBUG] train episode 677: reward = -14.00, steps = 1560\n",
      "14:00:46 [DEBUG] train episode 678: reward = -20.00, steps = 973\n",
      "14:06:24 [DEBUG] train episode 679: reward = -18.00, steps = 1172\n",
      "14:10:44 [DEBUG] train episode 680: reward = -21.00, steps = 907\n",
      "14:17:58 [DEBUG] train episode 681: reward = -13.00, steps = 1515\n",
      "14:25:40 [DEBUG] train episode 682: reward = -12.00, steps = 1602\n",
      "14:31:10 [DEBUG] train episode 683: reward = -18.00, steps = 1144\n",
      "14:36:16 [DEBUG] train episode 684: reward = -18.00, steps = 1066\n",
      "14:43:10 [DEBUG] train episode 685: reward = -14.00, steps = 1437\n",
      "14:51:22 [DEBUG] train episode 686: reward = -11.00, steps = 1712\n",
      "14:57:26 [DEBUG] train episode 687: reward = -16.00, steps = 1263\n",
      "15:03:05 [DEBUG] train episode 688: reward = -18.00, steps = 1151\n",
      "15:08:57 [DEBUG] train episode 689: reward = -18.00, steps = 1117\n",
      "15:14:59 [DEBUG] train episode 690: reward = -19.00, steps = 1078\n",
      "15:27:00 [DEBUG] train episode 691: reward = -3.00, steps = 2417\n",
      "15:32:57 [DEBUG] train episode 692: reward = -19.00, steps = 1232\n",
      "15:37:43 [DEBUG] train episode 693: reward = -21.00, steps = 1001\n",
      "15:42:49 [DEBUG] train episode 694: reward = -19.00, steps = 1053\n",
      "15:49:54 [DEBUG] train episode 695: reward = -16.00, steps = 1450\n",
      "15:56:04 [DEBUG] train episode 696: reward = -17.00, steps = 1291\n",
      "16:01:28 [DEBUG] train episode 697: reward = -18.00, steps = 1136\n",
      "16:08:21 [DEBUG] train episode 698: reward = -15.00, steps = 1440\n",
      "16:14:27 [DEBUG] train episode 699: reward = -16.00, steps = 1275\n",
      "16:19:19 [DEBUG] train episode 700: reward = -19.00, steps = 1023\n",
      "16:24:53 [DEBUG] train episode 701: reward = -16.00, steps = 1156\n",
      "16:31:01 [DEBUG] train episode 702: reward = -18.00, steps = 1282\n",
      "16:38:46 [DEBUG] train episode 703: reward = -15.00, steps = 1527\n",
      "16:52:17 [DEBUG] train episode 704: reward = -14.00, steps = 1474\n",
      "17:00:16 [DEBUG] train episode 705: reward = -19.00, steps = 1160\n",
      "17:07:36 [DEBUG] train episode 706: reward = -16.00, steps = 1391\n",
      "17:13:28 [DEBUG] train episode 707: reward = -19.00, steps = 1189\n",
      "17:19:43 [DEBUG] train episode 708: reward = -19.00, steps = 1262\n",
      "17:25:19 [DEBUG] train episode 709: reward = -18.00, steps = 1112\n",
      "17:31:41 [DEBUG] train episode 710: reward = -16.00, steps = 1276\n",
      "17:38:04 [DEBUG] train episode 711: reward = -18.00, steps = 1284\n",
      "17:45:34 [DEBUG] train episode 712: reward = -15.00, steps = 1525\n",
      "17:51:24 [DEBUG] train episode 713: reward = -18.00, steps = 1188\n",
      "17:57:28 [DEBUG] train episode 714: reward = -16.00, steps = 1230\n",
      "18:03:14 [DEBUG] train episode 715: reward = -17.00, steps = 1136\n",
      "18:08:19 [DEBUG] train episode 716: reward = -20.00, steps = 993\n",
      "18:15:13 [DEBUG] train episode 717: reward = -15.00, steps = 1370\n",
      "18:20:11 [DEBUG] train episode 718: reward = -19.00, steps = 993\n",
      "18:26:12 [DEBUG] train episode 719: reward = -17.00, steps = 1197\n",
      "18:31:26 [DEBUG] train episode 720: reward = -18.00, steps = 1055\n",
      "18:36:52 [DEBUG] train episode 721: reward = -19.00, steps = 1099\n",
      "18:42:00 [DEBUG] train episode 722: reward = -20.00, steps = 1030\n",
      "18:47:38 [DEBUG] train episode 723: reward = -18.00, steps = 1143\n",
      "18:52:29 [DEBUG] train episode 724: reward = -19.00, steps = 993\n",
      "18:58:51 [DEBUG] train episode 725: reward = -16.00, steps = 1295\n",
      "19:05:11 [DEBUG] train episode 726: reward = -20.00, steps = 886\n",
      "19:17:12 [DEBUG] train episode 727: reward = -19.00, steps = 1046\n",
      "19:27:25 [DEBUG] train episode 728: reward = -12.00, steps = 1573\n",
      "19:36:24 [DEBUG] train episode 729: reward = -13.00, steps = 1485\n",
      "19:42:18 [DEBUG] train episode 730: reward = -17.00, steps = 1122\n",
      "19:47:44 [DEBUG] train episode 731: reward = -19.00, steps = 1075\n",
      "19:54:48 [DEBUG] train episode 732: reward = -14.00, steps = 1445\n",
      "20:00:08 [DEBUG] train episode 733: reward = -19.00, steps = 1099\n",
      "20:07:35 [DEBUG] train episode 734: reward = -15.00, steps = 1452\n",
      "20:11:45 [DEBUG] train episode 735: reward = -20.00, steps = 831\n",
      "20:17:05 [DEBUG] train episode 736: reward = -18.00, steps = 1070\n",
      "20:25:42 [DEBUG] train episode 737: reward = -10.00, steps = 1730\n",
      "20:34:27 [DEBUG] train episode 738: reward = -11.00, steps = 1816\n",
      "20:40:10 [DEBUG] train episode 739: reward = -17.00, steps = 1157\n",
      "20:49:05 [DEBUG] train episode 740: reward = -13.00, steps = 1558\n",
      "20:56:17 [DEBUG] train episode 741: reward = -17.00, steps = 1333\n",
      "21:02:45 [DEBUG] train episode 742: reward = -18.00, steps = 1158\n",
      "21:08:00 [DEBUG] train episode 743: reward = -19.00, steps = 1006\n",
      "21:15:07 [DEBUG] train episode 744: reward = -19.00, steps = 1068\n",
      "21:26:59 [DEBUG] train episode 745: reward = -13.00, steps = 1504\n",
      "21:36:43 [DEBUG] train episode 746: reward = -18.00, steps = 1304\n",
      "21:42:26 [DEBUG] train episode 747: reward = -21.00, steps = 792\n",
      "21:52:36 [DEBUG] train episode 748: reward = -16.00, steps = 1280\n",
      "22:06:00 [DEBUG] train episode 749: reward = -10.00, steps = 1918\n",
      "22:15:42 [DEBUG] train episode 750: reward = -15.00, steps = 1359\n",
      "22:24:21 [DEBUG] train episode 751: reward = -16.00, steps = 1210\n",
      "22:32:32 [DEBUG] train episode 752: reward = -14.00, steps = 1531\n",
      "22:37:53 [DEBUG] train episode 753: reward = -18.00, steps = 1103\n",
      "22:45:31 [DEBUG] train episode 754: reward = -13.00, steps = 1533\n",
      "22:51:04 [DEBUG] train episode 755: reward = -18.00, steps = 1090\n",
      "22:58:24 [DEBUG] train episode 756: reward = -14.00, steps = 1416\n",
      "23:03:05 [DEBUG] train episode 757: reward = -19.00, steps = 968\n",
      "23:07:19 [DEBUG] train episode 758: reward = -21.00, steps = 882\n",
      "23:16:33 [DEBUG] train episode 759: reward = -10.00, steps = 1909\n",
      "23:22:13 [DEBUG] train episode 760: reward = -19.00, steps = 1167\n",
      "23:27:31 [DEBUG] train episode 761: reward = -18.00, steps = 1105\n",
      "23:32:41 [DEBUG] train episode 762: reward = -19.00, steps = 1067\n",
      "23:37:28 [DEBUG] train episode 763: reward = -19.00, steps = 995\n",
      "23:44:10 [DEBUG] train episode 764: reward = -15.00, steps = 1392\n",
      "23:48:59 [DEBUG] train episode 765: reward = -18.00, steps = 1009\n",
      "23:53:39 [DEBUG] train episode 766: reward = -20.00, steps = 980\n",
      "00:00:09 [DEBUG] train episode 767: reward = -17.00, steps = 1338\n",
      "00:07:40 [DEBUG] train episode 768: reward = -14.00, steps = 1561\n",
      "00:13:52 [DEBUG] train episode 769: reward = -17.00, steps = 1266\n",
      "00:23:49 [DEBUG] train episode 770: reward = -10.00, steps = 1968\n",
      "00:29:16 [DEBUG] train episode 771: reward = -18.00, steps = 1157\n",
      "00:36:33 [DEBUG] train episode 772: reward = -14.00, steps = 1571\n",
      "00:40:49 [DEBUG] train episode 773: reward = -20.00, steps = 925\n",
      "00:44:28 [DEBUG] train episode 774: reward = -21.00, steps = 785\n",
      "00:48:30 [DEBUG] train episode 775: reward = -20.00, steps = 870\n",
      "00:53:58 [DEBUG] train episode 776: reward = -19.00, steps = 1179\n",
      "00:59:19 [DEBUG] train episode 777: reward = -18.00, steps = 1146\n",
      "01:03:17 [DEBUG] train episode 778: reward = -21.00, steps = 842\n",
      "01:11:54 [DEBUG] train episode 779: reward = -10.00, steps = 1844\n",
      "01:16:16 [DEBUG] train episode 780: reward = -21.00, steps = 933\n",
      "01:22:53 [DEBUG] train episode 781: reward = -14.00, steps = 1410\n",
      "01:28:32 [DEBUG] train episode 782: reward = -18.00, steps = 1209\n",
      "01:35:55 [DEBUG] train episode 783: reward = -14.00, steps = 1569\n",
      "01:44:05 [DEBUG] train episode 784: reward = -10.00, steps = 1760\n",
      "01:50:08 [DEBUG] train episode 785: reward = -16.00, steps = 1299\n",
      "01:56:06 [DEBUG] train episode 786: reward = -16.00, steps = 1282\n",
      "02:03:51 [DEBUG] train episode 787: reward = -13.00, steps = 1656\n",
      "02:09:21 [DEBUG] train episode 788: reward = -16.00, steps = 1174\n",
      "02:19:57 [DEBUG] train episode 789: reward = -6.00, steps = 2265\n",
      "02:26:17 [DEBUG] train episode 790: reward = -17.00, steps = 1332\n",
      "02:31:11 [DEBUG] train episode 791: reward = -18.00, steps = 1043\n",
      "02:37:39 [DEBUG] train episode 792: reward = -15.00, steps = 1378\n",
      "02:44:16 [DEBUG] train episode 793: reward = -15.00, steps = 1401\n",
      "02:49:21 [DEBUG] train episode 794: reward = -19.00, steps = 1041\n",
      "02:55:33 [DEBUG] train episode 795: reward = -15.00, steps = 1323\n",
      "02:59:57 [DEBUG] train episode 796: reward = -19.00, steps = 935\n",
      "03:05:14 [DEBUG] train episode 797: reward = -17.00, steps = 1120\n",
      "03:09:36 [DEBUG] train episode 798: reward = -21.00, steps = 925\n",
      "03:14:50 [DEBUG] train episode 799: reward = -18.00, steps = 1120\n",
      "03:18:45 [DEBUG] train episode 800: reward = -20.00, steps = 835\n",
      "03:25:29 [DEBUG] train episode 801: reward = -16.00, steps = 1424\n",
      "03:29:39 [DEBUG] train episode 802: reward = -21.00, steps = 883\n",
      "03:36:57 [DEBUG] train episode 803: reward = -13.00, steps = 1552\n",
      "03:44:52 [DEBUG] train episode 804: reward = -13.00, steps = 1684\n",
      "03:48:58 [DEBUG] train episode 805: reward = -21.00, steps = 871\n",
      "03:56:31 [DEBUG] train episode 806: reward = -12.00, steps = 1605\n",
      "04:02:01 [DEBUG] train episode 807: reward = -18.00, steps = 1156\n",
      "04:07:24 [DEBUG] train episode 808: reward = -19.00, steps = 1084\n",
      "04:16:20 [DEBUG] train episode 809: reward = -11.00, steps = 1872\n",
      "04:21:23 [DEBUG] train episode 810: reward = -18.00, steps = 1060\n",
      "04:26:13 [DEBUG] train episode 811: reward = -19.00, steps = 1020\n",
      "04:34:49 [DEBUG] train episode 812: reward = -11.00, steps = 1822\n",
      "04:38:28 [DEBUG] train episode 813: reward = -21.00, steps = 776\n",
      "04:44:31 [DEBUG] train episode 814: reward = -18.00, steps = 1286\n",
      "04:48:26 [DEBUG] train episode 815: reward = -20.00, steps = 836\n",
      "04:53:07 [DEBUG] train episode 816: reward = -21.00, steps = 1001\n",
      "04:57:26 [DEBUG] train episode 817: reward = -20.00, steps = 926\n",
      "05:02:34 [DEBUG] train episode 818: reward = -18.00, steps = 1094\n",
      "05:07:16 [DEBUG] train episode 819: reward = -20.00, steps = 1004\n",
      "05:11:27 [DEBUG] train episode 820: reward = -20.00, steps = 892\n",
      "05:15:23 [DEBUG] train episode 821: reward = -21.00, steps = 837\n",
      "05:23:31 [DEBUG] train episode 822: reward = -12.00, steps = 1714\n",
      "05:28:15 [DEBUG] train episode 823: reward = -19.00, steps = 1011\n",
      "05:33:31 [DEBUG] train episode 824: reward = -17.00, steps = 1118\n",
      "05:38:16 [DEBUG] train episode 825: reward = -18.00, steps = 1016\n",
      "05:46:36 [DEBUG] train episode 826: reward = -11.00, steps = 1786\n",
      "05:51:52 [DEBUG] train episode 827: reward = -19.00, steps = 1126\n",
      "05:57:50 [DEBUG] train episode 828: reward = -18.00, steps = 1275\n",
      "06:02:43 [DEBUG] train episode 829: reward = -19.00, steps = 1042\n",
      "06:09:54 [DEBUG] train episode 830: reward = -13.00, steps = 1530\n",
      "06:13:52 [DEBUG] train episode 831: reward = -21.00, steps = 849\n",
      "06:19:36 [DEBUG] train episode 832: reward = -18.00, steps = 1225\n",
      "06:24:09 [DEBUG] train episode 833: reward = -19.00, steps = 936\n",
      "06:30:09 [DEBUG] train episode 834: reward = -16.00, steps = 1271\n",
      "06:37:17 [DEBUG] train episode 835: reward = -13.00, steps = 1527\n",
      "06:42:18 [DEBUG] train episode 836: reward = -18.00, steps = 1076\n",
      "06:48:36 [DEBUG] train episode 837: reward = -16.00, steps = 1351\n",
      "06:54:02 [DEBUG] train episode 838: reward = -18.00, steps = 1160\n",
      "06:58:46 [DEBUG] train episode 839: reward = -19.00, steps = 1017\n",
      "07:05:02 [DEBUG] train episode 840: reward = -18.00, steps = 1334\n",
      "07:09:35 [DEBUG] train episode 841: reward = -19.00, steps = 979\n",
      "07:14:52 [DEBUG] train episode 842: reward = -18.00, steps = 1130\n",
      "07:19:30 [DEBUG] train episode 843: reward = -19.00, steps = 992\n",
      "07:25:18 [DEBUG] train episode 844: reward = -16.00, steps = 1217\n",
      "07:30:22 [DEBUG] train episode 845: reward = -19.00, steps = 1080\n",
      "07:36:46 [DEBUG] train episode 846: reward = -17.00, steps = 1368\n",
      "07:41:23 [DEBUG] train episode 847: reward = -19.00, steps = 988\n",
      "07:45:56 [DEBUG] train episode 848: reward = -19.00, steps = 975\n",
      "07:51:40 [DEBUG] train episode 849: reward = -16.00, steps = 1227\n",
      "08:00:05 [DEBUG] train episode 850: reward = -10.00, steps = 1792\n",
      "08:06:50 [DEBUG] train episode 851: reward = -15.00, steps = 1434\n",
      "08:13:23 [DEBUG] train episode 852: reward = -14.00, steps = 1388\n",
      "08:17:15 [DEBUG] train episode 853: reward = -21.00, steps = 819\n",
      "08:21:41 [DEBUG] train episode 854: reward = -19.00, steps = 925\n",
      "08:27:13 [DEBUG] train episode 855: reward = -20.00, steps = 1005\n",
      "08:33:35 [DEBUG] train episode 856: reward = -17.00, steps = 1273\n",
      "08:39:12 [DEBUG] train episode 857: reward = -18.00, steps = 1151\n",
      "08:44:46 [DEBUG] train episode 858: reward = -19.00, steps = 1095\n",
      "08:49:44 [DEBUG] train episode 859: reward = -19.00, steps = 995\n",
      "08:53:52 [DEBUG] train episode 860: reward = -20.00, steps = 857\n",
      "09:01:36 [DEBUG] train episode 861: reward = -14.00, steps = 1596\n",
      "09:08:25 [DEBUG] train episode 862: reward = -15.00, steps = 1392\n",
      "09:13:31 [DEBUG] train episode 863: reward = -18.00, steps = 1050\n",
      "09:22:04 [DEBUG] train episode 864: reward = -12.00, steps = 1761\n",
      "09:27:53 [DEBUG] train episode 865: reward = -19.00, steps = 1212\n",
      "09:33:14 [DEBUG] train episode 866: reward = -19.00, steps = 1112\n",
      "09:39:09 [DEBUG] train episode 867: reward = -17.00, steps = 1224\n",
      "09:43:58 [DEBUG] train episode 868: reward = -21.00, steps = 1006\n",
      "09:51:51 [DEBUG] train episode 869: reward = -13.00, steps = 1648\n",
      "09:57:40 [DEBUG] train episode 870: reward = -17.00, steps = 1205\n",
      "10:05:21 [DEBUG] train episode 871: reward = -14.00, steps = 1567\n",
      "10:11:48 [DEBUG] train episode 872: reward = -16.00, steps = 1319\n",
      "10:18:57 [DEBUG] train episode 873: reward = -15.00, steps = 1475\n",
      "10:26:55 [DEBUG] train episode 874: reward = -14.00, steps = 1636\n",
      "10:34:06 [DEBUG] train episode 875: reward = -16.00, steps = 1476\n",
      "10:39:29 [DEBUG] train episode 876: reward = -19.00, steps = 1114\n",
      "10:43:44 [DEBUG] train episode 877: reward = -20.00, steps = 881\n",
      "10:49:47 [DEBUG] train episode 878: reward = -17.00, steps = 1258\n",
      "10:55:53 [DEBUG] train episode 879: reward = -17.00, steps = 1267\n",
      "11:02:13 [DEBUG] train episode 880: reward = -18.00, steps = 1307\n",
      "11:06:51 [DEBUG] train episode 881: reward = -19.00, steps = 961\n",
      "11:12:53 [DEBUG] train episode 882: reward = -16.00, steps = 1243\n",
      "11:23:29 [DEBUG] train episode 883: reward = -8.00, steps = 2183\n",
      "11:29:06 [DEBUG] train episode 884: reward = -17.00, steps = 1158\n",
      "11:35:16 [DEBUG] train episode 885: reward = -18.00, steps = 1272\n",
      "11:40:53 [DEBUG] train episode 886: reward = -20.00, steps = 1170\n",
      "11:53:03 [DEBUG] train episode 887: reward = -3.00, steps = 2533\n",
      "11:59:59 [DEBUG] train episode 888: reward = -14.00, steps = 1448\n",
      "12:05:39 [DEBUG] train episode 889: reward = -17.00, steps = 1185\n",
      "12:12:01 [DEBUG] train episode 890: reward = -15.00, steps = 1314\n",
      "12:16:09 [DEBUG] train episode 891: reward = -20.00, steps = 855\n",
      "12:24:08 [DEBUG] train episode 892: reward = -11.00, steps = 1632\n",
      "12:30:03 [DEBUG] train episode 893: reward = -18.00, steps = 1223\n",
      "12:38:07 [DEBUG] train episode 894: reward = -12.00, steps = 1676\n",
      "12:44:28 [DEBUG] train episode 895: reward = -16.00, steps = 1318\n",
      "12:51:40 [DEBUG] train episode 896: reward = -14.00, steps = 1494\n",
      "13:00:38 [DEBUG] train episode 897: reward = -11.00, steps = 1859\n",
      "13:10:19 [DEBUG] train episode 898: reward = -9.00, steps = 2002\n",
      "13:14:31 [DEBUG] train episode 899: reward = -20.00, steps = 859\n",
      "13:19:55 [DEBUG] train episode 900: reward = -21.00, steps = 962\n",
      "13:29:21 [DEBUG] train episode 901: reward = -13.00, steps = 1519\n",
      "13:37:10 [DEBUG] train episode 902: reward = -18.00, steps = 1261\n",
      "13:43:40 [DEBUG] train episode 903: reward = -18.00, steps = 1112\n",
      "13:51:07 [DEBUG] train episode 904: reward = -16.00, steps = 1307\n",
      "14:03:48 [DEBUG] train episode 905: reward = -4.00, steps = 2456\n",
      "14:08:53 [DEBUG] train episode 906: reward = -19.00, steps = 1015\n",
      "14:16:41 [DEBUG] train episode 907: reward = -15.00, steps = 1577\n",
      "14:25:20 [DEBUG] train episode 908: reward = -12.00, steps = 1755\n",
      "14:31:48 [DEBUG] train episode 909: reward = -15.00, steps = 1321\n",
      "14:38:02 [DEBUG] train episode 910: reward = -16.00, steps = 1282\n",
      "14:42:01 [DEBUG] train episode 911: reward = -21.00, steps = 820\n",
      "14:47:57 [DEBUG] train episode 912: reward = -17.00, steps = 1231\n",
      "14:52:46 [DEBUG] train episode 913: reward = -21.00, steps = 997\n",
      "14:58:21 [DEBUG] train episode 914: reward = -20.00, steps = 1160\n",
      "15:05:37 [DEBUG] train episode 915: reward = -13.00, steps = 1505\n",
      "15:16:08 [DEBUG] train episode 916: reward = -7.00, steps = 2172\n",
      "15:21:29 [DEBUG] train episode 917: reward = -18.00, steps = 1085\n",
      "15:34:43 [DEBUG] train episode 918: reward = 1.00, steps = 2727\n",
      "15:41:59 [DEBUG] train episode 919: reward = -14.00, steps = 1506\n",
      "15:49:29 [DEBUG] train episode 920: reward = -15.00, steps = 1557\n",
      "15:54:42 [DEBUG] train episode 921: reward = -18.00, steps = 1086\n",
      "15:59:12 [DEBUG] train episode 922: reward = -19.00, steps = 934\n",
      "16:05:58 [DEBUG] train episode 923: reward = -16.00, steps = 1398\n",
      "16:18:03 [DEBUG] train episode 924: reward = -3.00, steps = 2503\n",
      "16:27:17 [DEBUG] train episode 925: reward = -9.00, steps = 1900\n",
      "16:38:16 [DEBUG] train episode 926: reward = -6.00, steps = 2269\n",
      "16:43:38 [DEBUG] train episode 927: reward = -17.00, steps = 1110\n",
      "16:48:57 [DEBUG] train episode 928: reward = -18.00, steps = 1105\n",
      "17:00:53 [DEBUG] train episode 929: reward = -4.00, steps = 2474\n",
      "17:06:55 [DEBUG] train episode 930: reward = -16.00, steps = 1242\n",
      "17:13:51 [DEBUG] train episode 931: reward = -14.00, steps = 1419\n",
      "17:20:12 [DEBUG] train episode 932: reward = -16.00, steps = 1317\n",
      "17:27:38 [DEBUG] train episode 933: reward = -14.00, steps = 1531\n",
      "17:33:55 [DEBUG] train episode 934: reward = -16.00, steps = 1295\n",
      "17:41:02 [DEBUG] train episode 935: reward = -14.00, steps = 1464\n",
      "17:52:03 [DEBUG] train episode 936: reward = -6.00, steps = 2291\n",
      "17:56:15 [DEBUG] train episode 937: reward = -20.00, steps = 873\n",
      "18:06:17 [DEBUG] train episode 938: reward = -7.00, steps = 2063\n",
      "18:12:42 [DEBUG] train episode 939: reward = -15.00, steps = 1317\n",
      "18:25:37 [DEBUG] train episode 940: reward = 1.00, steps = 2672\n",
      "18:34:36 [DEBUG] train episode 941: reward = -9.00, steps = 1848\n",
      "18:39:03 [DEBUG] train episode 942: reward = -19.00, steps = 925\n",
      "18:44:39 [DEBUG] train episode 943: reward = -18.00, steps = 1171\n",
      "18:50:39 [DEBUG] train episode 944: reward = -18.00, steps = 1262\n",
      "19:01:29 [DEBUG] train episode 945: reward = -4.00, steps = 2260\n",
      "19:06:38 [DEBUG] train episode 946: reward = -18.00, steps = 1076\n",
      "19:11:02 [DEBUG] train episode 947: reward = -19.00, steps = 930\n",
      "19:17:33 [DEBUG] train episode 948: reward = -15.00, steps = 1368\n",
      "19:24:49 [DEBUG] train episode 949: reward = -14.00, steps = 1512\n",
      "19:29:44 [DEBUG] train episode 950: reward = -18.00, steps = 1024\n",
      "19:35:17 [DEBUG] train episode 951: reward = -19.00, steps = 1126\n",
      "19:39:57 [DEBUG] train episode 952: reward = -20.00, steps = 976\n",
      "19:43:45 [DEBUG] train episode 953: reward = -21.00, steps = 791\n",
      "19:49:06 [DEBUG] train episode 954: reward = -18.00, steps = 1126\n",
      "19:54:57 [DEBUG] train episode 955: reward = -16.00, steps = 1232\n",
      "19:59:49 [DEBUG] train episode 956: reward = -18.00, steps = 1023\n",
      "20:03:50 [DEBUG] train episode 957: reward = -20.00, steps = 839\n",
      "20:08:51 [DEBUG] train episode 958: reward = -18.00, steps = 1052\n",
      "20:16:29 [DEBUG] train episode 959: reward = -13.00, steps = 1604\n",
      "20:25:59 [DEBUG] train episode 960: reward = -10.00, steps = 1972\n",
      "20:30:55 [DEBUG] train episode 961: reward = -18.00, steps = 1032\n",
      "20:37:38 [DEBUG] train episode 962: reward = -15.00, steps = 1406\n",
      "20:42:39 [DEBUG] train episode 963: reward = -19.00, steps = 1050\n",
      "20:46:49 [DEBUG] train episode 964: reward = -20.00, steps = 871\n",
      "20:50:33 [DEBUG] train episode 965: reward = -21.00, steps = 782\n",
      "20:56:31 [DEBUG] train episode 966: reward = -16.00, steps = 1245\n",
      "21:00:54 [DEBUG] train episode 967: reward = -20.00, steps = 914\n",
      "21:06:17 [DEBUG] train episode 968: reward = -19.00, steps = 1124\n",
      "21:12:34 [DEBUG] train episode 969: reward = -17.00, steps = 1308\n",
      "21:19:29 [DEBUG] train episode 970: reward = -16.00, steps = 1443\n",
      "21:25:02 [DEBUG] train episode 971: reward = -18.00, steps = 1132\n",
      "21:32:07 [DEBUG] train episode 972: reward = -14.00, steps = 1455\n",
      "21:37:02 [DEBUG] train episode 973: reward = -19.00, steps = 1005\n",
      "21:41:38 [DEBUG] train episode 974: reward = -20.00, steps = 944\n",
      "21:47:54 [DEBUG] train episode 975: reward = -17.00, steps = 1293\n",
      "21:55:27 [DEBUG] train episode 976: reward = -15.00, steps = 1568\n",
      "21:59:51 [DEBUG] train episode 977: reward = -20.00, steps = 900\n",
      "22:05:12 [DEBUG] train episode 978: reward = -19.00, steps = 1092\n",
      "22:11:38 [DEBUG] train episode 979: reward = -16.00, steps = 1328\n",
      "22:17:59 [DEBUG] train episode 980: reward = -16.00, steps = 1300\n",
      "22:26:44 [DEBUG] train episode 981: reward = -12.00, steps = 1774\n",
      "22:35:11 [DEBUG] train episode 982: reward = -12.00, steps = 1723\n",
      "22:40:58 [DEBUG] train episode 983: reward = -18.00, steps = 1190\n",
      "22:47:34 [DEBUG] train episode 984: reward = -18.00, steps = 1348\n",
      "22:57:54 [DEBUG] train episode 985: reward = -8.00, steps = 2131\n",
      "23:06:18 [DEBUG] train episode 986: reward = -13.00, steps = 1741\n",
      "23:10:48 [DEBUG] train episode 987: reward = -19.00, steps = 929\n",
      "23:16:47 [DEBUG] train episode 988: reward = -19.00, steps = 1233\n",
      "23:24:53 [DEBUG] train episode 989: reward = -13.00, steps = 1618\n",
      "23:29:21 [DEBUG] train episode 990: reward = -21.00, steps = 901\n",
      "23:34:43 [DEBUG] train episode 991: reward = -18.00, steps = 1100\n",
      "23:38:22 [DEBUG] train episode 992: reward = -18.00, steps = 1024\n",
      "23:45:17 [DEBUG] train episode 993: reward = -9.00, steps = 1938\n",
      "23:51:00 [DEBUG] train episode 994: reward = -12.00, steps = 1600\n",
      "23:58:47 [DEBUG] train episode 995: reward = -7.00, steps = 2174\n",
      "00:03:37 [DEBUG] train episode 996: reward = -15.00, steps = 1314\n",
      "00:08:45 [DEBUG] train episode 997: reward = -15.00, steps = 1374\n",
      "00:15:39 [DEBUG] train episode 998: reward = -9.00, steps = 1892\n",
      "00:19:37 [DEBUG] train episode 999: reward = -18.00, steps = 1184\n",
      "00:25:48 [DEBUG] train episode 1000: reward = -11.00, steps = 1815\n",
      "00:33:31 [DEBUG] train episode 1001: reward = -6.00, steps = 2287\n",
      "00:37:08 [DEBUG] train episode 1002: reward = -18.00, steps = 1154\n",
      "00:42:52 [DEBUG] train episode 1003: reward = -11.00, steps = 1835\n",
      "00:46:01 [DEBUG] train episode 1004: reward = -19.00, steps = 1006\n",
      "00:49:13 [DEBUG] train episode 1005: reward = -18.00, steps = 1030\n",
      "00:53:51 [DEBUG] train episode 1006: reward = -14.00, steps = 1486\n",
      "00:57:23 [DEBUG] train episode 1007: reward = -19.00, steps = 1128\n",
      "01:00:58 [DEBUG] train episode 1008: reward = -18.00, steps = 1151\n",
      "01:06:28 [DEBUG] train episode 1009: reward = -11.00, steps = 1757\n",
      "01:10:07 [DEBUG] train episode 1010: reward = -18.00, steps = 1167\n",
      "01:14:46 [DEBUG] train episode 1011: reward = -14.00, steps = 1489\n",
      "01:19:36 [DEBUG] train episode 1012: reward = -13.00, steps = 1548\n",
      "01:23:26 [DEBUG] train episode 1013: reward = -19.00, steps = 1215\n",
      "01:32:05 [DEBUG] train episode 1014: reward = -1.00, steps = 2758\n",
      "01:37:56 [DEBUG] train episode 1015: reward = -10.00, steps = 1869\n",
      "01:42:03 [DEBUG] train episode 1016: reward = -16.00, steps = 1311\n",
      "01:47:14 [DEBUG] train episode 1017: reward = -14.00, steps = 1658\n",
      "01:52:02 [DEBUG] train episode 1018: reward = -14.00, steps = 1527\n",
      "01:55:50 [DEBUG] train episode 1019: reward = -16.00, steps = 1213\n",
      "01:59:53 [DEBUG] train episode 1020: reward = -16.00, steps = 1287\n",
      "02:03:51 [DEBUG] train episode 1021: reward = -17.00, steps = 1258\n",
      "02:06:48 [DEBUG] train episode 1022: reward = -20.00, steps = 933\n",
      "02:10:51 [DEBUG] train episode 1023: reward = -16.00, steps = 1288\n",
      "02:15:28 [DEBUG] train episode 1024: reward = -14.00, steps = 1469\n",
      "02:19:24 [DEBUG] train episode 1025: reward = -16.00, steps = 1248\n",
      "02:25:06 [DEBUG] train episode 1026: reward = -10.00, steps = 1806\n",
      "02:29:36 [DEBUG] train episode 1027: reward = -15.00, steps = 1432\n",
      "02:34:25 [DEBUG] train episode 1028: reward = -13.00, steps = 1529\n",
      "02:38:50 [DEBUG] train episode 1029: reward = -15.00, steps = 1406\n",
      "02:42:30 [DEBUG] train episode 1030: reward = -17.00, steps = 1168\n",
      "02:45:09 [DEBUG] train episode 1031: reward = -20.00, steps = 838\n",
      "02:50:01 [DEBUG] train episode 1032: reward = -13.00, steps = 1544\n",
      "02:54:18 [DEBUG] train episode 1033: reward = -15.00, steps = 1360\n",
      "02:58:54 [DEBUG] train episode 1034: reward = -15.00, steps = 1462\n",
      "03:04:43 [DEBUG] train episode 1035: reward = -10.00, steps = 1849\n",
      "03:09:44 [DEBUG] train episode 1036: reward = -13.00, steps = 1590\n",
      "03:14:57 [DEBUG] train episode 1037: reward = -12.00, steps = 1656\n",
      "03:22:04 [DEBUG] train episode 1038: reward = -6.00, steps = 2197\n",
      "03:29:01 [DEBUG] train episode 1039: reward = -7.00, steps = 2181\n",
      "03:35:19 [DEBUG] train episode 1040: reward = -9.00, steps = 1983\n",
      "03:41:24 [DEBUG] train episode 1041: reward = -11.00, steps = 1922\n",
      "03:45:51 [DEBUG] train episode 1042: reward = -18.00, steps = 1397\n",
      "03:49:18 [DEBUG] train episode 1043: reward = -18.00, steps = 1084\n",
      "03:54:27 [DEBUG] train episode 1044: reward = -14.00, steps = 1620\n",
      "03:59:00 [DEBUG] train episode 1045: reward = -14.00, steps = 1440\n",
      "04:03:29 [DEBUG] train episode 1046: reward = -15.00, steps = 1411\n",
      "04:09:38 [DEBUG] train episode 1047: reward = -9.00, steps = 1936\n",
      "04:16:45 [DEBUG] train episode 1048: reward = -5.00, steps = 2254\n",
      "04:23:26 [DEBUG] train episode 1049: reward = -13.00, steps = 2103\n",
      "04:28:44 [DEBUG] train episode 1050: reward = -13.00, steps = 1640\n",
      "04:34:49 [DEBUG] train episode 1051: reward = -10.00, steps = 1868\n",
      "04:40:28 [DEBUG] train episode 1052: reward = -11.00, steps = 1783\n",
      "04:45:28 [DEBUG] train episode 1053: reward = -14.00, steps = 1577\n",
      "04:51:14 [DEBUG] train episode 1054: reward = -11.00, steps = 1820\n",
      "04:54:39 [DEBUG] train episode 1055: reward = -19.00, steps = 1082\n",
      "04:58:52 [DEBUG] train episode 1056: reward = -17.00, steps = 1324\n",
      "05:02:08 [DEBUG] train episode 1057: reward = -19.00, steps = 1028\n",
      "05:06:59 [DEBUG] train episode 1058: reward = -16.00, steps = 1527\n",
      "05:12:44 [DEBUG] train episode 1059: reward = -13.00, steps = 1810\n",
      "05:21:07 [DEBUG] train episode 1060: reward = -5.00, steps = 2631\n",
      "05:25:17 [DEBUG] train episode 1061: reward = -17.00, steps = 1309\n",
      "05:30:15 [DEBUG] train episode 1062: reward = -14.00, steps = 1552\n",
      "05:36:07 [DEBUG] train episode 1063: reward = -9.00, steps = 1839\n",
      "05:40:04 [DEBUG] train episode 1064: reward = -17.00, steps = 1237\n",
      "05:43:13 [DEBUG] train episode 1065: reward = -20.00, steps = 983\n",
      "05:48:22 [DEBUG] train episode 1066: reward = -13.00, steps = 1614\n",
      "05:54:07 [DEBUG] train episode 1067: reward = -12.00, steps = 1807\n",
      "05:59:49 [DEBUG] train episode 1068: reward = -11.00, steps = 1784\n",
      "06:04:29 [DEBUG] train episode 1069: reward = -15.00, steps = 1459\n",
      "06:10:23 [DEBUG] train episode 1070: reward = -10.00, steps = 1852\n",
      "06:15:16 [DEBUG] train episode 1071: reward = -15.00, steps = 1534\n",
      "06:21:47 [DEBUG] train episode 1072: reward = -11.00, steps = 2030\n",
      "06:28:40 [DEBUG] train episode 1073: reward = -9.00, steps = 2025\n",
      "06:35:37 [DEBUG] train episode 1074: reward = -9.00, steps = 1982\n",
      "06:42:28 [DEBUG] train episode 1075: reward = -8.00, steps = 2094\n",
      "06:50:41 [DEBUG] train episode 1076: reward = 1.00, steps = 2597\n",
      "06:56:07 [DEBUG] train episode 1077: reward = -12.00, steps = 1714\n",
      "07:02:05 [DEBUG] train episode 1078: reward = -10.00, steps = 1892\n",
      "07:07:48 [DEBUG] train episode 1079: reward = -12.00, steps = 1765\n",
      "07:12:55 [DEBUG] train episode 1080: reward = -14.00, steps = 1610\n",
      "07:18:56 [DEBUG] train episode 1081: reward = -11.00, steps = 1892\n",
      "07:27:16 [DEBUG] train episode 1082: reward = -2.00, steps = 2600\n",
      "07:33:07 [DEBUG] train episode 1083: reward = -12.00, steps = 1836\n",
      "07:37:28 [DEBUG] train episode 1084: reward = -19.00, steps = 1375\n",
      "07:44:37 [DEBUG] train episode 1085: reward = -7.00, steps = 2260\n",
      "07:49:28 [DEBUG] train episode 1086: reward = -15.00, steps = 1535\n",
      "07:55:14 [DEBUG] train episode 1087: reward = -19.00, steps = 1820\n",
      "07:59:40 [DEBUG] train episode 1088: reward = -19.00, steps = 1403\n",
      "08:04:01 [DEBUG] train episode 1089: reward = -17.00, steps = 1367\n",
      "08:10:42 [DEBUG] train episode 1090: reward = -7.00, steps = 2106\n",
      "08:17:32 [DEBUG] train episode 1091: reward = -9.00, steps = 2017\n",
      "08:21:45 [DEBUG] train episode 1092: reward = -18.00, steps = 1324\n",
      "08:25:48 [DEBUG] train episode 1093: reward = -18.00, steps = 1280\n",
      "08:30:04 [DEBUG] train episode 1094: reward = -16.00, steps = 1347\n",
      "08:35:28 [DEBUG] train episode 1095: reward = -12.00, steps = 1712\n",
      "08:40:15 [DEBUG] train episode 1096: reward = -15.00, steps = 1517\n",
      "08:44:24 [DEBUG] train episode 1097: reward = -18.00, steps = 1315\n",
      "08:49:59 [DEBUG] train episode 1098: reward = -17.00, steps = 1659\n",
      "08:54:25 [DEBUG] train episode 1099: reward = -18.00, steps = 1309\n",
      "09:00:01 [DEBUG] train episode 1100: reward = -13.00, steps = 1668\n",
      "09:05:04 [DEBUG] train episode 1101: reward = -16.00, steps = 1504\n",
      "09:09:09 [DEBUG] train episode 1102: reward = -18.00, steps = 1243\n",
      "09:14:18 [DEBUG] train episode 1103: reward = -13.00, steps = 1580\n",
      "09:19:01 [DEBUG] train episode 1104: reward = -14.00, steps = 1429\n",
      "09:24:47 [DEBUG] train episode 1105: reward = -12.00, steps = 1755\n",
      "09:33:25 [DEBUG] train episode 1106: reward = 1.00, steps = 2593\n",
      "09:40:00 [DEBUG] train episode 1107: reward = -8.00, steps = 1973\n",
      "09:44:32 [DEBUG] train episode 1108: reward = -15.00, steps = 1368\n",
      "09:49:15 [DEBUG] train episode 1109: reward = -15.00, steps = 1413\n",
      "09:54:05 [DEBUG] train episode 1110: reward = -15.00, steps = 1453\n",
      "09:59:13 [DEBUG] train episode 1111: reward = -15.00, steps = 1543\n",
      "10:07:20 [DEBUG] train episode 1112: reward = -6.00, steps = 2447\n",
      "10:13:01 [DEBUG] train episode 1113: reward = -11.00, steps = 1725\n",
      "10:19:33 [DEBUG] train episode 1114: reward = -9.00, steps = 1976\n",
      "10:23:52 [DEBUG] train episode 1115: reward = -16.00, steps = 1281\n",
      "10:30:05 [DEBUG] train episode 1116: reward = -10.00, steps = 1860\n",
      "10:39:01 [DEBUG] train episode 1117: reward = -4.00, steps = 2687\n",
      "10:44:43 [DEBUG] train episode 1118: reward = -11.00, steps = 1712\n",
      "10:49:27 [DEBUG] train episode 1119: reward = -14.00, steps = 1426\n",
      "10:53:50 [DEBUG] train episode 1120: reward = -15.00, steps = 1325\n",
      "10:57:43 [DEBUG] train episode 1121: reward = -18.00, steps = 1166\n",
      "11:03:35 [DEBUG] train episode 1122: reward = -11.00, steps = 1759\n",
      "11:09:36 [DEBUG] train episode 1123: reward = -12.00, steps = 1811\n",
      "11:15:38 [DEBUG] train episode 1124: reward = -11.00, steps = 1812\n",
      "11:20:39 [DEBUG] train episode 1125: reward = -13.00, steps = 1506\n",
      "11:24:02 [DEBUG] train episode 1126: reward = -19.00, steps = 995\n",
      "11:31:07 [DEBUG] train episode 1127: reward = -9.00, steps = 2114\n",
      "11:35:26 [DEBUG] train episode 1128: reward = -15.00, steps = 1300\n",
      "11:40:08 [DEBUG] train episode 1129: reward = -17.00, steps = 1406\n",
      "11:46:26 [DEBUG] train episode 1130: reward = -9.00, steps = 1893\n",
      "11:51:59 [DEBUG] train episode 1131: reward = -13.00, steps = 1675\n",
      "11:57:55 [DEBUG] train episode 1132: reward = -10.00, steps = 1804\n",
      "12:05:21 [DEBUG] train episode 1133: reward = -6.00, steps = 2267\n",
      "12:10:10 [DEBUG] train episode 1134: reward = -16.00, steps = 1456\n",
      "12:16:21 [DEBUG] train episode 1135: reward = -10.00, steps = 1845\n",
      "12:21:58 [DEBUG] train episode 1136: reward = -13.00, steps = 1671\n",
      "12:28:08 [DEBUG] train episode 1137: reward = -10.00, steps = 1841\n",
      "12:33:01 [DEBUG] train episode 1138: reward = -14.00, steps = 1476\n",
      "12:40:01 [DEBUG] train episode 1139: reward = -7.00, steps = 2121\n",
      "12:44:44 [DEBUG] train episode 1140: reward = -15.00, steps = 1423\n",
      "12:51:47 [DEBUG] train episode 1141: reward = -6.00, steps = 2137\n",
      "12:57:38 [DEBUG] train episode 1142: reward = -10.00, steps = 1772\n",
      "13:03:35 [DEBUG] train episode 1143: reward = -11.00, steps = 1795\n",
      "13:09:03 [DEBUG] train episode 1144: reward = -12.00, steps = 1650\n",
      "13:14:57 [DEBUG] train episode 1145: reward = -11.00, steps = 1778\n",
      "13:20:52 [DEBUG] train episode 1146: reward = -9.00, steps = 1786\n",
      "13:28:26 [DEBUG] train episode 1147: reward = -6.00, steps = 2270\n",
      "13:34:19 [DEBUG] train episode 1148: reward = -10.00, steps = 1774\n",
      "13:41:15 [DEBUG] train episode 1149: reward = -9.00, steps = 2076\n",
      "13:45:50 [DEBUG] train episode 1150: reward = -14.00, steps = 1379\n",
      "13:49:02 [DEBUG] train episode 1151: reward = -19.00, steps = 936\n",
      "13:55:48 [DEBUG] train episode 1152: reward = -8.00, steps = 2004\n",
      "14:02:34 [DEBUG] train episode 1153: reward = -9.00, steps = 2010\n",
      "14:07:36 [DEBUG] train episode 1154: reward = -15.00, steps = 1497\n",
      "14:16:14 [DEBUG] train episode 1155: reward = -7.00, steps = 2572\n",
      "14:20:11 [DEBUG] train episode 1156: reward = -18.00, steps = 1195\n",
      "14:25:25 [DEBUG] train episode 1157: reward = -13.00, steps = 1560\n",
      "14:32:16 [DEBUG] train episode 1158: reward = -10.00, steps = 2052\n",
      "14:38:04 [DEBUG] train episode 1159: reward = -12.00, steps = 1738\n",
      "14:44:11 [DEBUG] train episode 1160: reward = -11.00, steps = 1822\n",
      "14:50:03 [DEBUG] train episode 1161: reward = -12.00, steps = 1727\n",
      "14:56:18 [DEBUG] train episode 1162: reward = -10.00, steps = 1870\n",
      "15:01:39 [DEBUG] train episode 1163: reward = -15.00, steps = 1602\n",
      "15:08:00 [DEBUG] train episode 1164: reward = -11.00, steps = 1897\n",
      "15:14:28 [DEBUG] train episode 1165: reward = -9.00, steps = 1939\n",
      "15:19:17 [DEBUG] train episode 1166: reward = -14.00, steps = 1450\n",
      "15:25:37 [DEBUG] train episode 1167: reward = -9.00, steps = 1888\n",
      "15:30:50 [DEBUG] train episode 1168: reward = -13.00, steps = 1561\n",
      "15:36:34 [DEBUG] train episode 1169: reward = -12.00, steps = 1719\n",
      "15:41:40 [DEBUG] train episode 1170: reward = -14.00, steps = 1512\n",
      "15:46:54 [DEBUG] train episode 1171: reward = -14.00, steps = 1552\n",
      "15:53:34 [DEBUG] train episode 1172: reward = -11.00, steps = 1826\n",
      "15:59:32 [DEBUG] train episode 1173: reward = -12.00, steps = 1584\n",
      "16:04:36 [DEBUG] train episode 1174: reward = -17.00, steps = 1393\n",
      "16:09:22 [DEBUG] train episode 1175: reward = -16.00, steps = 1333\n",
      "16:14:22 [DEBUG] train episode 1176: reward = -15.00, steps = 1439\n",
      "16:20:10 [DEBUG] train episode 1177: reward = -13.00, steps = 1625\n",
      "16:26:41 [DEBUG] train episode 1178: reward = -11.00, steps = 1823\n",
      "16:32:33 [DEBUG] train episode 1179: reward = -12.00, steps = 1689\n",
      "16:38:09 [DEBUG] train episode 1180: reward = -15.00, steps = 1643\n",
      "16:41:22 [DEBUG] train episode 1181: reward = -20.00, steps = 960\n",
      "16:46:30 [DEBUG] train episode 1182: reward = -15.00, steps = 1515\n",
      "16:49:50 [DEBUG] train episode 1183: reward = -20.00, steps = 984\n",
      "16:53:42 [DEBUG] train episode 1184: reward = -17.00, steps = 1153\n",
      "16:58:23 [DEBUG] train episode 1185: reward = -15.00, steps = 1377\n",
      "17:03:00 [DEBUG] train episode 1186: reward = -15.00, steps = 1355\n",
      "17:09:43 [DEBUG] train episode 1187: reward = -10.00, steps = 1973\n",
      "17:14:42 [DEBUG] train episode 1188: reward = -14.00, steps = 1475\n",
      "17:19:36 [DEBUG] train episode 1189: reward = -14.00, steps = 1466\n",
      "17:24:56 [DEBUG] train episode 1190: reward = -13.00, steps = 1569\n",
      "17:31:16 [DEBUG] train episode 1191: reward = -11.00, steps = 1887\n",
      "17:36:44 [DEBUG] train episode 1192: reward = -13.00, steps = 1627\n",
      "17:42:06 [DEBUG] train episode 1193: reward = -14.00, steps = 1585\n",
      "17:47:01 [DEBUG] train episode 1194: reward = -14.00, steps = 1442\n",
      "17:52:46 [DEBUG] train episode 1195: reward = -13.00, steps = 1694\n",
      "17:56:26 [DEBUG] train episode 1196: reward = -19.00, steps = 1078\n",
      "18:01:44 [DEBUG] train episode 1197: reward = -14.00, steps = 1546\n",
      "18:05:47 [DEBUG] train episode 1198: reward = -17.00, steps = 1173\n",
      "18:10:41 [DEBUG] train episode 1199: reward = -15.00, steps = 1425\n",
      "18:16:45 [DEBUG] train episode 1200: reward = -11.00, steps = 1821\n",
      "18:22:07 [DEBUG] train episode 1201: reward = -12.00, steps = 1597\n",
      "18:25:46 [DEBUG] train episode 1202: reward = -18.00, steps = 1087\n",
      "18:30:09 [DEBUG] train episode 1203: reward = -16.00, steps = 1299\n",
      "18:34:54 [DEBUG] train episode 1204: reward = -14.00, steps = 1409\n",
      "18:40:16 [DEBUG] train episode 1205: reward = -12.00, steps = 1596\n",
      "18:44:49 [DEBUG] train episode 1206: reward = -15.00, steps = 1354\n",
      "18:48:53 [DEBUG] train episode 1207: reward = -16.00, steps = 1222\n",
      "18:53:33 [DEBUG] train episode 1208: reward = -16.00, steps = 1399\n",
      "18:58:44 [DEBUG] train episode 1209: reward = -14.00, steps = 1553\n",
      "19:01:48 [DEBUG] train episode 1210: reward = -20.00, steps = 919\n",
      "19:04:54 [DEBUG] train episode 1211: reward = -19.00, steps = 934\n",
      "19:09:30 [DEBUG] train episode 1212: reward = -14.00, steps = 1385\n",
      "19:14:14 [DEBUG] train episode 1213: reward = -16.00, steps = 1421\n",
      "19:19:19 [DEBUG] train episode 1214: reward = -13.00, steps = 1535\n",
      "19:23:18 [DEBUG] train episode 1215: reward = -17.00, steps = 1182\n",
      "19:29:37 [DEBUG] train episode 1216: reward = -13.00, steps = 1665\n",
      "19:36:50 [DEBUG] train episode 1217: reward = -11.00, steps = 1751\n",
      "19:41:00 [DEBUG] train episode 1218: reward = -18.00, steps = 1146\n",
      "19:45:57 [DEBUG] train episode 1219: reward = -20.00, steps = 1097\n",
      "19:51:34 [DEBUG] train episode 1220: reward = -19.00, steps = 1011\n",
      "19:58:15 [DEBUG] train episode 1221: reward = -19.00, steps = 1225\n",
      "20:07:01 [DEBUG] train episode 1222: reward = -10.00, steps = 1802\n",
      "20:10:14 [DEBUG] train episode 1223: reward = -19.00, steps = 937\n",
      "20:14:21 [DEBUG] train episode 1224: reward = -17.00, steps = 1196\n",
      "20:19:16 [DEBUG] train episode 1225: reward = -15.00, steps = 1433\n",
      "20:26:12 [DEBUG] train episode 1226: reward = -9.00, steps = 2006\n",
      "20:31:58 [DEBUG] train episode 1227: reward = -12.00, steps = 1681\n",
      "20:39:26 [DEBUG] train episode 1228: reward = -8.00, steps = 2181\n",
      "20:43:34 [DEBUG] train episode 1229: reward = -17.00, steps = 1200\n",
      "20:47:16 [DEBUG] train episode 1230: reward = -19.00, steps = 1083\n",
      "20:51:52 [DEBUG] train episode 1231: reward = -19.00, steps = 1336\n",
      "20:59:03 [DEBUG] train episode 1232: reward = -8.00, steps = 2090\n",
      "21:03:16 [DEBUG] train episode 1233: reward = -17.00, steps = 1228\n",
      "21:06:30 [DEBUG] train episode 1234: reward = -19.00, steps = 931\n",
      "21:10:22 [DEBUG] train episode 1235: reward = -18.00, steps = 1084\n",
      "21:17:01 [DEBUG] train episode 1236: reward = -10.00, steps = 1834\n",
      "21:22:15 [DEBUG] train episode 1237: reward = -16.00, steps = 1259\n",
      "21:29:18 [DEBUG] train episode 1238: reward = -12.00, steps = 1590\n",
      "21:35:31 [DEBUG] train episode 1239: reward = -17.00, steps = 1157\n",
      "21:41:02 [DEBUG] train episode 1240: reward = -18.00, steps = 1187\n",
      "21:46:38 [DEBUG] train episode 1241: reward = -12.00, steps = 1576\n",
      "21:50:21 [DEBUG] train episode 1242: reward = -19.00, steps = 1051\n",
      "21:53:05 [DEBUG] train episode 1243: reward = -21.00, steps = 776\n",
      "21:56:57 [DEBUG] train episode 1244: reward = -18.00, steps = 1064\n",
      "22:05:43 [DEBUG] train episode 1245: reward = -4.00, steps = 2489\n",
      "22:09:44 [DEBUG] train episode 1246: reward = -17.00, steps = 1175\n",
      "22:13:09 [DEBUG] train episode 1247: reward = -20.00, steps = 1000\n",
      "22:17:29 [DEBUG] train episode 1248: reward = -16.00, steps = 1276\n",
      "22:20:31 [DEBUG] train episode 1249: reward = -20.00, steps = 906\n",
      "22:24:00 [DEBUG] train episode 1250: reward = -19.00, steps = 1033\n",
      "22:30:22 [DEBUG] train episode 1251: reward = -12.00, steps = 1800\n",
      "22:34:23 [DEBUG] train episode 1252: reward = -19.00, steps = 1055\n",
      "22:39:14 [DEBUG] train episode 1253: reward = -16.00, steps = 1307\n",
      "22:42:37 [DEBUG] train episode 1254: reward = -19.00, steps = 959\n",
      "22:48:43 [DEBUG] train episode 1255: reward = -13.00, steps = 1648\n",
      "22:52:13 [DEBUG] train episode 1256: reward = -19.00, steps = 969\n",
      "22:58:07 [DEBUG] train episode 1257: reward = -12.00, steps = 1637\n",
      "23:02:45 [DEBUG] train episode 1258: reward = -18.00, steps = 1126\n",
      "23:08:58 [DEBUG] train episode 1259: reward = -18.00, steps = 1155\n",
      "23:16:59 [DEBUG] train episode 1260: reward = -10.00, steps = 1818\n",
      "23:24:58 [DEBUG] train episode 1261: reward = -8.00, steps = 2238\n",
      "23:28:51 [DEBUG] train episode 1262: reward = -18.00, steps = 1063\n",
      "23:33:05 [DEBUG] train episode 1263: reward = -17.00, steps = 1252\n",
      "23:37:27 [DEBUG] train episode 1264: reward = -18.00, steps = 1232\n",
      "23:43:29 [DEBUG] train episode 1265: reward = -11.00, steps = 1723\n",
      "23:51:06 [DEBUG] train episode 1266: reward = -8.00, steps = 2085\n",
      "23:57:53 [DEBUG] train episode 1267: reward = -12.00, steps = 1876\n",
      "00:01:44 [DEBUG] train episode 1268: reward = -18.00, steps = 1110\n",
      "00:07:00 [DEBUG] train episode 1269: reward = -14.00, steps = 1420\n",
      "00:13:37 [DEBUG] train episode 1270: reward = -9.00, steps = 2000\n",
      "00:17:29 [DEBUG] train episode 1271: reward = -17.00, steps = 1170\n",
      "00:22:43 [DEBUG] train episode 1272: reward = -14.00, steps = 1552\n",
      "00:27:01 [DEBUG] train episode 1273: reward = -15.00, steps = 1307\n",
      "00:33:41 [DEBUG] train episode 1274: reward = -11.00, steps = 2020\n",
      "00:38:37 [DEBUG] train episode 1275: reward = -14.00, steps = 1505\n",
      "00:45:26 [DEBUG] train episode 1276: reward = -7.00, steps = 2069\n",
      "00:50:05 [DEBUG] train episode 1277: reward = -15.00, steps = 1417\n",
      "00:53:14 [DEBUG] train episode 1278: reward = -20.00, steps = 963\n",
      "00:57:14 [DEBUG] train episode 1279: reward = -17.00, steps = 1220\n",
      "01:03:37 [DEBUG] train episode 1280: reward = -9.00, steps = 1947\n",
      "01:07:30 [DEBUG] train episode 1281: reward = -17.00, steps = 1177\n",
      "01:11:29 [DEBUG] train episode 1282: reward = -16.00, steps = 1217\n",
      "01:16:36 [DEBUG] train episode 1283: reward = -13.00, steps = 1557\n",
      "01:22:01 [DEBUG] train episode 1284: reward = -12.00, steps = 1641\n",
      "01:27:11 [DEBUG] train episode 1285: reward = -14.00, steps = 1577\n",
      "01:32:03 [DEBUG] train episode 1286: reward = -14.00, steps = 1478\n",
      "01:36:51 [DEBUG] train episode 1287: reward = -14.00, steps = 1464\n",
      "01:39:46 [DEBUG] train episode 1288: reward = -20.00, steps = 886\n",
      "01:45:02 [DEBUG] train episode 1289: reward = -15.00, steps = 1607\n",
      "01:51:02 [DEBUG] train episode 1290: reward = -10.00, steps = 1824\n",
      "01:56:21 [DEBUG] train episode 1291: reward = -12.00, steps = 1619\n",
      "02:00:24 [DEBUG] train episode 1292: reward = -17.00, steps = 1226\n",
      "02:05:03 [DEBUG] train episode 1293: reward = -14.00, steps = 1416\n",
      "02:11:16 [DEBUG] train episode 1294: reward = -10.00, steps = 1891\n",
      "02:15:07 [DEBUG] train episode 1295: reward = -17.00, steps = 1183\n",
      "02:18:53 [DEBUG] train episode 1296: reward = -17.00, steps = 1154\n",
      "02:24:18 [DEBUG] train episode 1297: reward = -12.00, steps = 1651\n",
      "02:28:54 [DEBUG] train episode 1298: reward = -14.00, steps = 1409\n",
      "02:33:54 [DEBUG] train episode 1299: reward = -13.00, steps = 1534\n",
      "02:39:11 [DEBUG] train episode 1300: reward = -13.00, steps = 1617\n",
      "02:44:26 [DEBUG] train episode 1301: reward = -12.00, steps = 1616\n",
      "02:50:26 [DEBUG] train episode 1302: reward = -10.00, steps = 1840\n",
      "02:54:04 [DEBUG] train episode 1303: reward = -18.00, steps = 1113\n",
      "02:58:25 [DEBUG] train episode 1304: reward = -16.00, steps = 1336\n",
      "03:02:24 [DEBUG] train episode 1305: reward = -16.00, steps = 1220\n",
      "03:07:00 [DEBUG] train episode 1306: reward = -17.00, steps = 1410\n",
      "03:12:19 [DEBUG] train episode 1307: reward = -12.00, steps = 1624\n",
      "03:18:05 [DEBUG] train episode 1308: reward = -12.00, steps = 1766\n",
      "03:25:31 [DEBUG] train episode 1309: reward = -7.00, steps = 2274\n",
      "03:30:46 [DEBUG] train episode 1310: reward = -12.00, steps = 1605\n",
      "03:36:32 [DEBUG] train episode 1311: reward = -11.00, steps = 1764\n",
      "03:40:40 [DEBUG] train episode 1312: reward = -17.00, steps = 1263\n",
      "03:47:14 [DEBUG] train episode 1313: reward = -9.00, steps = 2012\n",
      "03:53:26 [DEBUG] train episode 1314: reward = -10.00, steps = 1857\n",
      "03:58:50 [DEBUG] train episode 1315: reward = -12.00, steps = 1647\n",
      "04:04:15 [DEBUG] train episode 1316: reward = -14.00, steps = 1651\n",
      "04:07:37 [DEBUG] train episode 1317: reward = -19.00, steps = 1015\n",
      "04:12:33 [DEBUG] train episode 1318: reward = -16.00, steps = 1502\n",
      "04:16:06 [DEBUG] train episode 1319: reward = -18.00, steps = 1083\n",
      "04:22:04 [DEBUG] train episode 1320: reward = -10.00, steps = 1815\n",
      "04:29:08 [DEBUG] train episode 1321: reward = -8.00, steps = 2144\n",
      "04:37:16 [DEBUG] train episode 1322: reward = -3.00, steps = 2477\n",
      "04:41:26 [DEBUG] train episode 1323: reward = -17.00, steps = 1264\n",
      "04:46:36 [DEBUG] train episode 1324: reward = -15.00, steps = 1576\n",
      "04:52:36 [DEBUG] train episode 1325: reward = -10.00, steps = 1826\n",
      "04:58:04 [DEBUG] train episode 1326: reward = -12.00, steps = 1665\n",
      "05:02:52 [DEBUG] train episode 1327: reward = -16.00, steps = 1461\n",
      "05:06:39 [DEBUG] train episode 1328: reward = -17.00, steps = 1151\n",
      "05:10:33 [DEBUG] train episode 1329: reward = -16.00, steps = 1184\n",
      "05:15:23 [DEBUG] train episode 1330: reward = -16.00, steps = 1471\n",
      "05:20:05 [DEBUG] train episode 1331: reward = -14.00, steps = 1434\n",
      "05:24:39 [DEBUG] train episode 1332: reward = -15.00, steps = 1373\n",
      "05:28:33 [DEBUG] train episode 1333: reward = -17.00, steps = 1188\n",
      "05:32:29 [DEBUG] train episode 1334: reward = -17.00, steps = 1191\n",
      "05:39:31 [DEBUG] train episode 1335: reward = -10.00, steps = 2142\n",
      "05:45:41 [DEBUG] train episode 1336: reward = -10.00, steps = 1882\n",
      "05:51:34 [DEBUG] train episode 1337: reward = -10.00, steps = 1794\n",
      "05:56:57 [DEBUG] train episode 1338: reward = -11.00, steps = 1635\n",
      "06:00:21 [DEBUG] train episode 1339: reward = -18.00, steps = 1030\n",
      "06:07:12 [DEBUG] train episode 1340: reward = -9.00, steps = 2078\n",
      "06:11:09 [DEBUG] train episode 1341: reward = -17.00, steps = 1201\n",
      "06:17:33 [DEBUG] train episode 1342: reward = -12.00, steps = 1945\n",
      "06:22:13 [DEBUG] train episode 1343: reward = -14.00, steps = 1407\n",
      "06:28:19 [DEBUG] train episode 1344: reward = -9.00, steps = 1856\n",
      "06:33:01 [DEBUG] train episode 1345: reward = -15.00, steps = 1423\n",
      "06:38:42 [DEBUG] train episode 1346: reward = -12.00, steps = 1724\n",
      "06:42:22 [DEBUG] train episode 1347: reward = -18.00, steps = 1110\n",
      "06:46:09 [DEBUG] train episode 1348: reward = -18.00, steps = 1152\n",
      "06:51:44 [DEBUG] train episode 1349: reward = -11.00, steps = 1695\n",
      "06:58:08 [DEBUG] train episode 1350: reward = -10.00, steps = 1942\n",
      "07:02:52 [DEBUG] train episode 1351: reward = -14.00, steps = 1437\n",
      "07:07:40 [DEBUG] train episode 1352: reward = -15.00, steps = 1457\n",
      "07:13:01 [DEBUG] train episode 1353: reward = -14.00, steps = 1633\n",
      "07:16:55 [DEBUG] train episode 1354: reward = -17.00, steps = 1186\n",
      "07:22:22 [DEBUG] train episode 1355: reward = -13.00, steps = 1644\n",
      "07:25:26 [DEBUG] train episode 1356: reward = -19.00, steps = 933\n",
      "07:29:59 [DEBUG] train episode 1357: reward = -15.00, steps = 1381\n",
      "07:32:44 [DEBUG] train episode 1358: reward = -20.00, steps = 833\n",
      "07:36:29 [DEBUG] train episode 1359: reward = -18.00, steps = 1138\n",
      "07:41:21 [DEBUG] train episode 1360: reward = -15.00, steps = 1480\n",
      "07:47:30 [DEBUG] train episode 1361: reward = -12.00, steps = 1826\n",
      "07:53:42 [DEBUG] train episode 1362: reward = -12.00, steps = 1872\n",
      "07:58:38 [DEBUG] train episode 1363: reward = -14.00, steps = 1470\n",
      "08:05:19 [DEBUG] train episode 1364: reward = -10.00, steps = 2001\n",
      "08:09:08 [DEBUG] train episode 1365: reward = -17.00, steps = 1141\n",
      "08:14:26 [DEBUG] train episode 1366: reward = -13.00, steps = 1583\n",
      "08:19:36 [DEBUG] train episode 1367: reward = -13.00, steps = 1549\n",
      "08:25:22 [DEBUG] train episode 1368: reward = -12.00, steps = 1714\n",
      "08:31:19 [DEBUG] train episode 1369: reward = -12.00, steps = 1788\n",
      "08:38:13 [DEBUG] train episode 1370: reward = -8.00, steps = 2077\n",
      "08:43:09 [DEBUG] train episode 1371: reward = -14.00, steps = 1398\n",
      "08:46:50 [DEBUG] train episode 1372: reward = -19.00, steps = 935\n",
      "08:51:48 [DEBUG] train episode 1373: reward = -14.00, steps = 1470\n",
      "08:58:08 [DEBUG] train episode 1374: reward = -9.00, steps = 1907\n",
      "09:01:29 [DEBUG] train episode 1375: reward = -20.00, steps = 1007\n",
      "09:07:55 [DEBUG] train episode 1376: reward = -10.00, steps = 1941\n",
      "09:12:33 [DEBUG] train episode 1377: reward = -15.00, steps = 1403\n",
      "09:16:30 [DEBUG] train episode 1378: reward = -17.00, steps = 1195\n",
      "09:21:47 [DEBUG] train episode 1379: reward = -12.00, steps = 1587\n",
      "09:27:23 [DEBUG] train episode 1380: reward = -12.00, steps = 1690\n",
      "09:33:28 [DEBUG] train episode 1381: reward = -10.00, steps = 1828\n",
      "09:38:27 [DEBUG] train episode 1382: reward = -15.00, steps = 1489\n",
      "09:42:57 [DEBUG] train episode 1383: reward = -15.00, steps = 1346\n",
      "09:46:49 [DEBUG] train episode 1384: reward = -17.00, steps = 1159\n",
      "09:51:48 [DEBUG] train episode 1385: reward = -13.00, steps = 1505\n",
      "09:57:20 [DEBUG] train episode 1386: reward = -12.00, steps = 1664\n",
      "10:00:34 [DEBUG] train episode 1387: reward = -19.00, steps = 958\n",
      "10:07:36 [DEBUG] train episode 1388: reward = -8.00, steps = 2097\n",
      "10:13:25 [DEBUG] train episode 1389: reward = -12.00, steps = 1736\n",
      "10:17:46 [DEBUG] train episode 1390: reward = -16.00, steps = 1289\n",
      "10:24:04 [DEBUG] train episode 1391: reward = -10.00, steps = 1874\n",
      "10:28:31 [DEBUG] train episode 1392: reward = -16.00, steps = 1337\n",
      "10:34:31 [DEBUG] train episode 1393: reward = -12.00, steps = 1794\n",
      "10:39:23 [DEBUG] train episode 1394: reward = -14.00, steps = 1436\n",
      "10:46:55 [DEBUG] train episode 1395: reward = -8.00, steps = 2237\n",
      "10:52:43 [DEBUG] train episode 1396: reward = -11.00, steps = 1737\n",
      "10:55:48 [DEBUG] train episode 1397: reward = -20.00, steps = 915\n",
      "11:00:07 [DEBUG] train episode 1398: reward = -16.00, steps = 1281\n",
      "11:06:17 [DEBUG] train episode 1399: reward = -10.00, steps = 1838\n",
      "11:09:42 [DEBUG] train episode 1400: reward = -19.00, steps = 1020\n",
      "11:13:22 [DEBUG] train episode 1401: reward = -20.00, steps = 1094\n",
      "11:18:04 [DEBUG] train episode 1402: reward = -14.00, steps = 1407\n",
      "11:23:39 [DEBUG] train episode 1403: reward = -12.00, steps = 1667\n",
      "11:28:27 [DEBUG] train episode 1404: reward = -15.00, steps = 1421\n",
      "11:35:00 [DEBUG] train episode 1405: reward = -10.00, steps = 1834\n",
      "11:40:51 [DEBUG] train episode 1406: reward = -13.00, steps = 1618\n",
      "11:47:14 [DEBUG] train episode 1407: reward = -9.00, steps = 1881\n",
      "11:53:12 [DEBUG] train episode 1408: reward = -11.00, steps = 1694\n",
      "11:57:04 [DEBUG] train episode 1409: reward = -18.00, steps = 1115\n",
      "12:04:48 [DEBUG] train episode 1410: reward = -8.00, steps = 2221\n",
      "12:11:14 [DEBUG] train episode 1411: reward = -10.00, steps = 1897\n",
      "12:16:48 [DEBUG] train episode 1412: reward = -12.00, steps = 1639\n",
      "12:22:52 [DEBUG] train episode 1413: reward = -12.00, steps = 1776\n",
      "12:27:21 [DEBUG] train episode 1414: reward = -15.00, steps = 1316\n",
      "12:32:49 [DEBUG] train episode 1415: reward = -13.00, steps = 1613\n",
      "12:36:00 [DEBUG] train episode 1416: reward = -19.00, steps = 935\n",
      "12:40:57 [DEBUG] train episode 1417: reward = -14.00, steps = 1467\n",
      "12:49:43 [DEBUG] train episode 1418: reward = -6.00, steps = 2526\n",
      "12:55:25 [DEBUG] train episode 1419: reward = -16.00, steps = 1576\n",
      "13:00:54 [DEBUG] train episode 1420: reward = -14.00, steps = 1583\n",
      "13:06:25 [DEBUG] train episode 1421: reward = -14.00, steps = 1554\n",
      "13:10:10 [DEBUG] train episode 1422: reward = -19.00, steps = 1054\n",
      "13:13:56 [DEBUG] train episode 1423: reward = -19.00, steps = 1073\n",
      "13:18:53 [DEBUG] train episode 1424: reward = -15.00, steps = 1404\n",
      "13:24:04 [DEBUG] train episode 1425: reward = -13.00, steps = 1501\n",
      "13:30:28 [DEBUG] train episode 1426: reward = -11.00, steps = 1840\n",
      "13:40:31 [DEBUG] train episode 1427: reward = 1.00, steps = 2718\n",
      "13:46:39 [DEBUG] train episode 1428: reward = -12.00, steps = 1565\n",
      "13:53:59 [DEBUG] train episode 1429: reward = -11.00, steps = 1873\n",
      "13:58:51 [DEBUG] train episode 1430: reward = -16.00, steps = 1249\n",
      "14:03:44 [DEBUG] train episode 1431: reward = -16.00, steps = 1246\n",
      "14:07:22 [DEBUG] train episode 1432: reward = -19.00, steps = 935\n",
      "14:13:31 [DEBUG] train episode 1433: reward = -13.00, steps = 1579\n",
      "14:18:53 [DEBUG] train episode 1434: reward = -14.00, steps = 1380\n",
      "14:24:34 [DEBUG] train episode 1435: reward = -15.00, steps = 1460\n",
      "14:29:45 [DEBUG] train episode 1436: reward = -15.00, steps = 1314\n",
      "14:33:32 [DEBUG] train episode 1437: reward = -18.00, steps = 1097\n",
      "14:38:26 [DEBUG] train episode 1438: reward = -15.00, steps = 1428\n",
      "14:42:54 [DEBUG] train episode 1439: reward = -16.00, steps = 1326\n",
      "14:47:41 [DEBUG] train episode 1440: reward = -15.00, steps = 1416\n",
      "14:52:04 [DEBUG] train episode 1441: reward = -17.00, steps = 1249\n",
      "14:57:57 [DEBUG] train episode 1442: reward = -10.00, steps = 1748\n",
      "15:05:34 [DEBUG] train episode 1443: reward = -10.00, steps = 2040\n",
      "15:09:38 [DEBUG] train episode 1444: reward = -18.00, steps = 1044\n",
      "15:13:40 [DEBUG] train episode 1445: reward = -18.00, steps = 1027\n",
      "15:17:52 [DEBUG] train episode 1446: reward = -16.00, steps = 1219\n",
      "15:21:57 [DEBUG] train episode 1447: reward = -17.00, steps = 1164\n",
      "15:27:42 [DEBUG] train episode 1448: reward = -13.00, steps = 1629\n",
      "15:33:53 [DEBUG] train episode 1449: reward = -12.00, steps = 1792\n",
      "15:39:32 [DEBUG] train episode 1450: reward = -12.00, steps = 1637\n",
      "15:44:00 [DEBUG] train episode 1451: reward = -16.00, steps = 1282\n",
      "15:47:14 [DEBUG] train episode 1452: reward = -19.00, steps = 938\n",
      "15:52:18 [DEBUG] train episode 1453: reward = -14.00, steps = 1465\n",
      "15:58:56 [DEBUG] train episode 1454: reward = -10.00, steps = 1937\n",
      "16:04:25 [DEBUG] train episode 1455: reward = -12.00, steps = 1602\n",
      "16:10:02 [DEBUG] train episode 1456: reward = -13.00, steps = 1583\n",
      "16:15:20 [DEBUG] train episode 1457: reward = -16.00, steps = 1472\n",
      "16:19:51 [DEBUG] train episode 1458: reward = -16.00, steps = 1260\n",
      "16:27:44 [DEBUG] train episode 1459: reward = -10.00, steps = 1972\n",
      "16:32:57 [DEBUG] train episode 1460: reward = -15.00, steps = 1325\n",
      "16:37:54 [DEBUG] train episode 1461: reward = -17.00, steps = 1266\n",
      "16:46:55 [DEBUG] train episode 1462: reward = -9.00, steps = 2297\n",
      "16:51:48 [DEBUG] train episode 1463: reward = -17.00, steps = 1240\n",
      "16:57:32 [DEBUG] train episode 1464: reward = -14.00, steps = 1461\n",
      "17:04:08 [DEBUG] train episode 1465: reward = -13.00, steps = 1680\n",
      "17:08:52 [DEBUG] train episode 1466: reward = -16.00, steps = 1280\n",
      "17:16:14 [DEBUG] train episode 1467: reward = -7.00, steps = 2167\n",
      "17:21:11 [DEBUG] train episode 1468: reward = -14.00, steps = 1420\n",
      "17:25:37 [DEBUG] train episode 1469: reward = -16.00, steps = 1276\n",
      "17:31:31 [DEBUG] train episode 1470: reward = -13.00, steps = 1720\n",
      "17:36:36 [DEBUG] train episode 1471: reward = -13.00, steps = 1486\n",
      "17:44:16 [DEBUG] train episode 1472: reward = -8.00, steps = 2258\n",
      "17:49:36 [DEBUG] train episode 1473: reward = -15.00, steps = 1587\n",
      "17:54:35 [DEBUG] train episode 1474: reward = -14.00, steps = 1479\n",
      "18:00:17 [DEBUG] train episode 1475: reward = -12.00, steps = 1681\n",
      "18:07:58 [DEBUG] train episode 1476: reward = -8.00, steps = 2260\n",
      "18:12:58 [DEBUG] train episode 1477: reward = -15.00, steps = 1476\n",
      "18:18:07 [DEBUG] train episode 1478: reward = -15.00, steps = 1526\n",
      "18:25:11 [DEBUG] train episode 1479: reward = -8.00, steps = 2093\n",
      "18:30:20 [DEBUG] train episode 1480: reward = -13.00, steps = 1513\n",
      "18:34:51 [DEBUG] train episode 1481: reward = -15.00, steps = 1326\n",
      "18:40:33 [DEBUG] train episode 1482: reward = -13.00, steps = 1694\n",
      "18:45:58 [DEBUG] train episode 1483: reward = -14.00, steps = 1615\n",
      "18:50:37 [DEBUG] train episode 1484: reward = -15.00, steps = 1361\n",
      "18:56:29 [DEBUG] train episode 1485: reward = -13.00, steps = 1679\n",
      "19:00:55 [DEBUG] train episode 1486: reward = -17.00, steps = 1252\n",
      "19:06:06 [DEBUG] train episode 1487: reward = -14.00, steps = 1462\n",
      "19:10:40 [DEBUG] train episode 1488: reward = -17.00, steps = 1318\n",
      "19:13:58 [DEBUG] train episode 1489: reward = -19.00, steps = 964\n",
      "19:19:32 [DEBUG] train episode 1490: reward = -12.00, steps = 1622\n",
      "19:22:31 [DEBUG] train episode 1491: reward = -20.00, steps = 874\n",
      "19:27:50 [DEBUG] train episode 1492: reward = -13.00, steps = 1567\n",
      "19:35:48 [DEBUG] train episode 1493: reward = -4.00, steps = 2336\n",
      "19:40:41 [DEBUG] train episode 1494: reward = -15.00, steps = 1453\n",
      "19:46:32 [DEBUG] train episode 1495: reward = -14.00, steps = 1742\n",
      "19:53:05 [DEBUG] train episode 1496: reward = -10.00, steps = 1869\n",
      "19:57:12 [DEBUG] train episode 1497: reward = -18.00, steps = 1185\n",
      "20:02:55 [DEBUG] train episode 1498: reward = -14.00, steps = 1557\n",
      "20:08:07 [DEBUG] train episode 1499: reward = -16.00, steps = 1485\n",
      "20:12:19 [DEBUG] train episode 1500: reward = -17.00, steps = 1179\n",
      "20:18:17 [DEBUG] train episode 1501: reward = -13.00, steps = 1619\n",
      "20:24:47 [DEBUG] train episode 1502: reward = -12.00, steps = 1603\n",
      "20:29:53 [DEBUG] train episode 1503: reward = -15.00, steps = 1415\n",
      "20:36:17 [DEBUG] train episode 1504: reward = -11.00, steps = 1801\n",
      "20:43:36 [DEBUG] train episode 1505: reward = -6.00, steps = 2051\n",
      "20:50:02 [DEBUG] train episode 1506: reward = -13.00, steps = 1744\n",
      "20:59:25 [DEBUG] train episode 1507: reward = 1.00, steps = 2609\n",
      "21:04:57 [DEBUG] train episode 1508: reward = -14.00, steps = 1558\n",
      "21:11:02 [DEBUG] train episode 1509: reward = -12.00, steps = 1732\n",
      "21:20:37 [DEBUG] train episode 1510: reward = 1.00, steps = 2765\n",
      "21:26:53 [DEBUG] train episode 1511: reward = -12.00, steps = 1741\n",
      "21:35:46 [DEBUG] train episode 1512: reward = -2.00, steps = 2441\n",
      "21:40:13 [DEBUG] train episode 1513: reward = -17.00, steps = 1278\n",
      "21:46:55 [DEBUG] train episode 1514: reward = -10.00, steps = 1890\n",
      "21:53:14 [DEBUG] train episode 1515: reward = -12.00, steps = 1823\n",
      "21:59:45 [DEBUG] train episode 1516: reward = -10.00, steps = 1862\n",
      "22:05:36 [DEBUG] train episode 1517: reward = -12.00, steps = 1664\n",
      "22:09:07 [DEBUG] train episode 1518: reward = -19.00, steps = 992\n",
      "22:16:14 [DEBUG] train episode 1519: reward = -9.00, steps = 1945\n",
      "22:23:49 [DEBUG] train episode 1520: reward = -8.00, steps = 2038\n",
      "22:28:22 [DEBUG] train episode 1521: reward = -16.00, steps = 1255\n",
      "22:34:31 [DEBUG] train episode 1522: reward = -12.00, steps = 1686\n",
      "22:41:33 [DEBUG] train episode 1523: reward = -10.00, steps = 1885\n",
      "22:51:15 [DEBUG] train episode 1524: reward = -5.00, steps = 2640\n",
      "22:56:52 [DEBUG] train episode 1525: reward = -16.00, steps = 1374\n",
      "23:03:14 [DEBUG] train episode 1526: reward = -13.00, steps = 1713\n",
      "23:09:11 [DEBUG] train episode 1527: reward = -15.00, steps = 1483\n",
      "23:13:02 [DEBUG] train episode 1528: reward = -20.00, steps = 898\n",
      "23:20:48 [DEBUG] train episode 1529: reward = -9.00, steps = 1988\n",
      "23:27:34 [DEBUG] train episode 1530: reward = -11.00, steps = 1782\n",
      "23:33:40 [DEBUG] train episode 1531: reward = -14.00, steps = 1596\n",
      "23:38:18 [DEBUG] train episode 1532: reward = -19.00, steps = 1163\n",
      "23:45:32 [DEBUG] train episode 1533: reward = -11.00, steps = 1821\n",
      "23:53:11 [DEBUG] train episode 1534: reward = -9.00, steps = 1930\n",
      "00:02:20 [DEBUG] train episode 1535: reward = -12.00, steps = 1770\n",
      "00:08:41 [DEBUG] train episode 1536: reward = -10.00, steps = 1835\n",
      "00:14:45 [DEBUG] train episode 1537: reward = -12.00, steps = 1640\n",
      "00:18:43 [DEBUG] train episode 1538: reward = -17.00, steps = 1100\n",
      "00:25:58 [DEBUG] train episode 1539: reward = -10.00, steps = 2060\n",
      "00:31:44 [DEBUG] train episode 1540: reward = -12.00, steps = 1655\n",
      "00:37:21 [DEBUG] train episode 1541: reward = -14.00, steps = 1638\n",
      "00:42:33 [DEBUG] train episode 1542: reward = -14.00, steps = 1513\n",
      "00:51:56 [DEBUG] train episode 1543: reward = 1.00, steps = 2742\n",
      "01:00:29 [DEBUG] train episode 1544: reward = -2.00, steps = 2491\n",
      "01:05:33 [DEBUG] train episode 1545: reward = -16.00, steps = 1480\n",
      "01:12:09 [DEBUG] train episode 1546: reward = -9.00, steps = 1931\n",
      "01:16:58 [DEBUG] train episode 1547: reward = -16.00, steps = 1406\n",
      "01:25:34 [DEBUG] train episode 1548: reward = 1.00, steps = 2510\n",
      "01:29:19 [DEBUG] train episode 1549: reward = -17.00, steps = 1096\n",
      "01:35:36 [DEBUG] train episode 1550: reward = -11.00, steps = 1838\n",
      "01:40:00 [DEBUG] train episode 1551: reward = -16.00, steps = 1293\n",
      "01:46:34 [DEBUG] train episode 1552: reward = -8.00, steps = 1928\n",
      "01:52:01 [DEBUG] train episode 1553: reward = -14.00, steps = 1600\n",
      "01:56:42 [DEBUG] train episode 1554: reward = -15.00, steps = 1374\n",
      "02:00:22 [DEBUG] train episode 1555: reward = -18.00, steps = 1070\n",
      "02:06:33 [DEBUG] train episode 1556: reward = -11.00, steps = 1812\n",
      "02:13:31 [DEBUG] train episode 1557: reward = -9.00, steps = 2050\n",
      "02:22:03 [DEBUG] train episode 1558: reward = 1.00, steps = 2521\n",
      "02:27:20 [DEBUG] train episode 1559: reward = -13.00, steps = 1554\n",
      "02:34:39 [DEBUG] train episode 1560: reward = -7.00, steps = 2153\n",
      "02:40:18 [DEBUG] train episode 1561: reward = -12.00, steps = 1666\n",
      "02:50:00 [DEBUG] train episode 1562: reward = 1.00, steps = 2864\n",
      "02:54:57 [DEBUG] train episode 1563: reward = -15.00, steps = 1468\n",
      "02:59:46 [DEBUG] train episode 1564: reward = -14.00, steps = 1430\n",
      "03:05:55 [DEBUG] train episode 1565: reward = -11.00, steps = 1827\n",
      "03:15:20 [DEBUG] train episode 1566: reward = -2.00, steps = 2780\n",
      "03:21:17 [DEBUG] train episode 1567: reward = -12.00, steps = 1763\n",
      "03:25:19 [DEBUG] train episode 1568: reward = -17.00, steps = 1191\n",
      "03:31:22 [DEBUG] train episode 1569: reward = -12.00, steps = 1783\n",
      "03:38:53 [DEBUG] train episode 1570: reward = -6.00, steps = 2228\n",
      "03:45:32 [DEBUG] train episode 1571: reward = -9.00, steps = 1969\n",
      "03:54:22 [DEBUG] train episode 1572: reward = -5.00, steps = 2621\n",
      "04:00:26 [DEBUG] train episode 1573: reward = -11.00, steps = 1791\n",
      "04:05:48 [DEBUG] train episode 1574: reward = -12.00, steps = 1579\n",
      "04:10:51 [DEBUG] train episode 1575: reward = -14.00, steps = 1494\n",
      "04:14:16 [DEBUG] train episode 1576: reward = -18.00, steps = 1005\n",
      "04:20:51 [DEBUG] train episode 1577: reward = -11.00, steps = 1863\n",
      "04:28:34 [DEBUG] train episode 1578: reward = -8.00, steps = 2261\n",
      "04:34:43 [DEBUG] train episode 1579: reward = -12.00, steps = 1810\n",
      "04:41:55 [DEBUG] train episode 1580: reward = -9.00, steps = 2126\n",
      "04:48:10 [DEBUG] train episode 1581: reward = -13.00, steps = 1833\n",
      "04:54:33 [DEBUG] train episode 1582: reward = -11.00, steps = 1880\n",
      "04:59:28 [DEBUG] train episode 1583: reward = -15.00, steps = 1434\n",
      "05:06:54 [DEBUG] train episode 1584: reward = -7.00, steps = 2172\n",
      "05:13:16 [DEBUG] train episode 1585: reward = -9.00, steps = 1862\n",
      "05:18:53 [DEBUG] train episode 1586: reward = -13.00, steps = 1644\n",
      "05:25:33 [DEBUG] train episode 1587: reward = -11.00, steps = 1947\n",
      "05:31:04 [DEBUG] train episode 1588: reward = -13.00, steps = 1609\n",
      "05:37:06 [DEBUG] train episode 1589: reward = -13.00, steps = 1769\n",
      "05:44:19 [DEBUG] train episode 1590: reward = -8.00, steps = 2123\n",
      "05:53:13 [DEBUG] train episode 1591: reward = -2.00, steps = 2619\n",
      "06:00:46 [DEBUG] train episode 1592: reward = -6.00, steps = 2213\n",
      "06:06:19 [DEBUG] train episode 1593: reward = -12.00, steps = 1631\n",
      "06:12:52 [DEBUG] train episode 1594: reward = -10.00, steps = 1937\n",
      "06:17:23 [DEBUG] train episode 1595: reward = -16.00, steps = 1335\n",
      "06:24:25 [DEBUG] train episode 1596: reward = -7.00, steps = 2069\n",
      "06:31:41 [DEBUG] train episode 1597: reward = -7.00, steps = 2147\n",
      "06:36:32 [DEBUG] train episode 1598: reward = -16.00, steps = 1435\n",
      "06:43:22 [DEBUG] train episode 1599: reward = -9.00, steps = 2017\n",
      "06:49:23 [DEBUG] train episode 1600: reward = -11.00, steps = 1771\n",
      "06:54:11 [DEBUG] train episode 1601: reward = -14.00, steps = 1411\n",
      "06:59:18 [DEBUG] train episode 1602: reward = -14.00, steps = 1505\n",
      "07:03:58 [DEBUG] train episode 1603: reward = -17.00, steps = 1372\n",
      "07:10:20 [DEBUG] train episode 1604: reward = -10.00, steps = 1880\n",
      "07:16:17 [DEBUG] train episode 1605: reward = -12.00, steps = 1753\n",
      "07:20:55 [DEBUG] train episode 1606: reward = -16.00, steps = 1364\n",
      "07:28:16 [DEBUG] train episode 1607: reward = -7.00, steps = 2170\n",
      "07:34:22 [DEBUG] train episode 1608: reward = -14.00, steps = 1799\n",
      "07:40:06 [DEBUG] train episode 1609: reward = -12.00, steps = 1688\n",
      "07:44:54 [DEBUG] train episode 1610: reward = -14.00, steps = 1417\n",
      "07:52:40 [DEBUG] train episode 1611: reward = -5.00, steps = 2291\n",
      "07:58:55 [DEBUG] train episode 1612: reward = -11.00, steps = 1844\n",
      "08:05:00 [DEBUG] train episode 1613: reward = -12.00, steps = 1788\n",
      "08:10:40 [DEBUG] train episode 1614: reward = -12.00, steps = 1668\n",
      "08:16:37 [DEBUG] train episode 1615: reward = -12.00, steps = 1749\n",
      "08:24:29 [DEBUG] train episode 1616: reward = -7.00, steps = 2203\n",
      "08:28:27 [DEBUG] train episode 1617: reward = -17.00, steps = 1179\n",
      "08:33:57 [DEBUG] train episode 1618: reward = -12.00, steps = 1638\n",
      "08:39:45 [DEBUG] train episode 1619: reward = -13.00, steps = 1721\n",
      "08:45:51 [DEBUG] train episode 1620: reward = -11.00, steps = 1813\n",
      "08:53:33 [DEBUG] train episode 1621: reward = -6.00, steps = 2280\n",
      "09:00:20 [DEBUG] train episode 1622: reward = -10.00, steps = 1985\n",
      "09:07:10 [DEBUG] train episode 1623: reward = -9.00, steps = 2004\n",
      "09:15:18 [DEBUG] train episode 1624: reward = -7.00, steps = 2405\n",
      "09:19:00 [DEBUG] train episode 1625: reward = -18.00, steps = 1087\n",
      "09:24:37 [DEBUG] train episode 1626: reward = -14.00, steps = 1648\n",
      "09:29:30 [DEBUG] train episode 1627: reward = -15.00, steps = 1439\n",
      "09:33:50 [DEBUG] train episode 1628: reward = -16.00, steps = 1288\n",
      "09:40:09 [DEBUG] train episode 1629: reward = -10.00, steps = 1882\n",
      "09:47:34 [DEBUG] train episode 1630: reward = -9.00, steps = 2195\n",
      "09:52:51 [DEBUG] train episode 1631: reward = -13.00, steps = 1567\n",
      "09:59:43 [DEBUG] train episode 1632: reward = -8.00, steps = 2037\n",
      "10:05:50 [DEBUG] train episode 1633: reward = -11.00, steps = 1809\n",
      "10:12:40 [DEBUG] train episode 1634: reward = -8.00, steps = 2027\n",
      "10:21:04 [DEBUG] train episode 1635: reward = 1.00, steps = 2489\n",
      "10:28:14 [DEBUG] train episode 1636: reward = -9.00, steps = 2126\n",
      "10:36:02 [DEBUG] train episode 1637: reward = -7.00, steps = 2307\n",
      "10:44:19 [DEBUG] train episode 1638: reward = -6.00, steps = 2456\n",
      "10:52:14 [DEBUG] train episode 1639: reward = -8.00, steps = 2335\n",
      "11:01:35 [DEBUG] train episode 1640: reward = 1.00, steps = 2772\n",
      "11:07:12 [DEBUG] train episode 1641: reward = -12.00, steps = 1658\n",
      "11:13:18 [DEBUG] train episode 1642: reward = -12.00, steps = 1802\n",
      "11:19:47 [DEBUG] train episode 1643: reward = -11.00, steps = 1923\n",
      "11:27:18 [DEBUG] train episode 1644: reward = -8.00, steps = 2222\n",
      "11:35:54 [DEBUG] train episode 1645: reward = -3.00, steps = 2551\n",
      "11:43:03 [DEBUG] train episode 1646: reward = -9.00, steps = 2123\n",
      "11:48:30 [DEBUG] train episode 1647: reward = -14.00, steps = 1608\n",
      "11:55:17 [DEBUG] train episode 1648: reward = -10.00, steps = 1997\n",
      "12:03:08 [DEBUG] train episode 1649: reward = -4.00, steps = 2313\n",
      "12:09:24 [DEBUG] train episode 1650: reward = -11.00, steps = 1849\n",
      "12:14:45 [DEBUG] train episode 1651: reward = -14.00, steps = 1574\n",
      "12:22:57 [DEBUG] train episode 1652: reward = -4.00, steps = 2418\n",
      "12:32:26 [DEBUG] train episode 1653: reward = -1.00, steps = 2783\n",
      "12:37:36 [DEBUG] train episode 1654: reward = -16.00, steps = 1527\n",
      "12:45:05 [DEBUG] train episode 1655: reward = -9.00, steps = 2212\n",
      "12:52:04 [DEBUG] train episode 1656: reward = -9.00, steps = 2067\n",
      "12:56:51 [DEBUG] train episode 1657: reward = -15.00, steps = 1399\n",
      "13:04:28 [DEBUG] train episode 1658: reward = -8.00, steps = 2217\n",
      "13:14:30 [DEBUG] train episode 1659: reward = 1.00, steps = 2627\n",
      "13:21:30 [DEBUG] train episode 1660: reward = -11.00, steps = 1755\n",
      "13:32:12 [DEBUG] train episode 1661: reward = 1.00, steps = 2715\n",
      "13:38:43 [DEBUG] train episode 1662: reward = -10.00, steps = 1853\n",
      "13:43:24 [DEBUG] train episode 1663: reward = -15.00, steps = 1346\n",
      "13:50:39 [DEBUG] train episode 1664: reward = -8.00, steps = 2081\n",
      "13:57:08 [DEBUG] train episode 1665: reward = -11.00, steps = 1871\n",
      "14:02:14 [DEBUG] train episode 1666: reward = -14.00, steps = 1477\n",
      "14:09:22 [DEBUG] train episode 1667: reward = -8.00, steps = 2057\n",
      "14:17:35 [DEBUG] train episode 1668: reward = -2.00, steps = 2380\n",
      "14:24:31 [DEBUG] train episode 1669: reward = -8.00, steps = 2003\n",
      "14:31:51 [DEBUG] train episode 1670: reward = 9.00, steps = 2130\n",
      "14:40:24 [DEBUG] train episode 1671: reward = 1.00, steps = 2499\n",
      "14:47:00 [DEBUG] train episode 1672: reward = -8.00, steps = 1921\n",
      "14:55:12 [DEBUG] train episode 1673: reward = -4.00, steps = 2394\n",
      "15:00:02 [DEBUG] train episode 1674: reward = -16.00, steps = 1406\n",
      "15:06:28 [DEBUG] train episode 1675: reward = -10.00, steps = 1877\n",
      "15:13:19 [DEBUG] train episode 1676: reward = 12.00, steps = 2000\n",
      "15:21:49 [DEBUG] train episode 1677: reward = -3.00, steps = 2466\n",
      "15:28:40 [DEBUG] train episode 1678: reward = -8.00, steps = 1985\n",
      "15:33:21 [DEBUG] train episode 1679: reward = -15.00, steps = 1352\n",
      "15:40:02 [DEBUG] train episode 1680: reward = -9.00, steps = 1944\n",
      "15:46:08 [DEBUG] train episode 1681: reward = -12.00, steps = 1768\n",
      "15:52:03 [DEBUG] train episode 1682: reward = -12.00, steps = 1724\n",
      "15:59:08 [DEBUG] train episode 1683: reward = -8.00, steps = 2063\n",
      "16:04:18 [DEBUG] train episode 1684: reward = -13.00, steps = 1501\n",
      "16:12:04 [DEBUG] train episode 1685: reward = -6.00, steps = 2264\n",
      "16:18:52 [DEBUG] train episode 1686: reward = -11.00, steps = 1986\n",
      "16:25:39 [DEBUG] train episode 1687: reward = 15.00, steps = 1972\n",
      "16:33:59 [DEBUG] train episode 1688: reward = -1.00, steps = 2432\n",
      "16:42:13 [DEBUG] train episode 1689: reward = -7.00, steps = 2409\n",
      "16:48:05 [DEBUG] train episode 1690: reward = -12.00, steps = 1708\n",
      "16:54:52 [DEBUG] train episode 1691: reward = -10.00, steps = 1981\n",
      "17:03:35 [DEBUG] train episode 1692: reward = 1.00, steps = 2537\n",
      "17:07:57 [DEBUG] train episode 1693: reward = -18.00, steps = 1269\n",
      "17:14:06 [DEBUG] train episode 1694: reward = -11.00, steps = 1792\n",
      "17:22:53 [DEBUG] train episode 1695: reward = -1.00, steps = 2568\n",
      "17:30:06 [DEBUG] train episode 1696: reward = -8.00, steps = 2101\n",
      "17:38:05 [DEBUG] train episode 1697: reward = 9.00, steps = 2336\n",
      "17:46:57 [DEBUG] train episode 1698: reward = -3.00, steps = 2584\n",
      "17:52:02 [DEBUG] train episode 1699: reward = -15.00, steps = 1482\n",
      "17:57:19 [DEBUG] train episode 1700: reward = -13.00, steps = 1545\n",
      "18:03:23 [DEBUG] train episode 1701: reward = -11.00, steps = 1761\n",
      "18:08:27 [DEBUG] train episode 1702: reward = -14.00, steps = 1478\n",
      "18:17:00 [DEBUG] train episode 1703: reward = 3.00, steps = 2499\n",
      "18:24:09 [DEBUG] train episode 1704: reward = 9.00, steps = 2091\n",
      "18:29:50 [DEBUG] train episode 1705: reward = -12.00, steps = 1656\n",
      "18:37:01 [DEBUG] train episode 1706: reward = -8.00, steps = 2111\n",
      "18:42:42 [DEBUG] train episode 1707: reward = -11.00, steps = 1659\n",
      "18:51:47 [DEBUG] train episode 1708: reward = -1.00, steps = 2656\n",
      "18:59:18 [DEBUG] train episode 1709: reward = 6.00, steps = 2198\n",
      "19:05:50 [DEBUG] train episode 1710: reward = -11.00, steps = 1899\n",
      "19:13:27 [DEBUG] train episode 1711: reward = -7.00, steps = 2189\n",
      "19:20:14 [DEBUG] train episode 1712: reward = 14.00, steps = 1969\n",
      "19:26:48 [DEBUG] train episode 1713: reward = -11.00, steps = 1887\n",
      "19:32:54 [DEBUG] train episode 1714: reward = -12.00, steps = 1757\n",
      "19:38:47 [DEBUG] train episode 1715: reward = -12.00, steps = 1694\n",
      "19:45:22 [DEBUG] train episode 1716: reward = -9.00, steps = 1913\n",
      "19:51:57 [DEBUG] train episode 1717: reward = -8.00, steps = 1912\n",
      "19:59:19 [DEBUG] train episode 1718: reward = 10.00, steps = 2132\n",
      "20:05:46 [DEBUG] train episode 1719: reward = -8.00, steps = 1879\n",
      "20:13:19 [DEBUG] train episode 1720: reward = 8.00, steps = 2212\n",
      "20:17:53 [DEBUG] train episode 1721: reward = -16.00, steps = 1332\n",
      "20:26:40 [DEBUG] train episode 1722: reward = 1.00, steps = 2553\n",
      "20:31:35 [DEBUG] train episode 1723: reward = -16.00, steps = 1426\n",
      "20:36:48 [DEBUG] train episode 1724: reward = -16.00, steps = 1520\n",
      "20:40:37 [DEBUG] train episode 1725: reward = -20.00, steps = 1110\n",
      "20:48:39 [DEBUG] train episode 1726: reward = 7.00, steps = 2342\n",
      "20:55:30 [DEBUG] train episode 1727: reward = -8.00, steps = 1991\n",
      "21:04:34 [DEBUG] train episode 1728: reward = -2.00, steps = 2648\n",
      "21:13:05 [DEBUG] train episode 1729: reward = -6.00, steps = 2494\n",
      "21:20:37 [DEBUG] train episode 1730: reward = -6.00, steps = 2202\n",
      "21:26:20 [DEBUG] train episode 1731: reward = -13.00, steps = 1656\n",
      "21:32:39 [DEBUG] train episode 1732: reward = -11.00, steps = 1828\n",
      "21:39:53 [DEBUG] train episode 1733: reward = -10.00, steps = 2071\n",
      "21:43:07 [DEBUG] train episode 1734: reward = -20.00, steps = 835\n",
      "21:51:49 [DEBUG] train episode 1735: reward = -7.00, steps = 2268\n",
      "22:00:56 [DEBUG] train episode 1736: reward = -9.00, steps = 2322\n",
      "22:06:03 [DEBUG] train episode 1737: reward = -16.00, steps = 1307\n",
      "22:12:37 [DEBUG] train episode 1738: reward = -11.00, steps = 1687\n",
      "22:19:31 [DEBUG] train episode 1739: reward = -11.00, steps = 1914\n",
      "22:29:12 [DEBUG] train episode 1740: reward = -1.00, steps = 2680\n",
      "22:37:42 [DEBUG] train episode 1741: reward = 9.00, steps = 2276\n",
      "22:46:13 [DEBUG] train episode 1742: reward = 8.00, steps = 2189\n",
      "22:54:47 [DEBUG] train episode 1743: reward = 13.00, steps = 2259\n",
      "23:01:09 [DEBUG] train episode 1744: reward = -12.00, steps = 1625\n",
      "23:10:05 [DEBUG] train episode 1745: reward = -5.00, steps = 2296\n",
      "23:16:31 [DEBUG] train episode 1746: reward = -12.00, steps = 1672\n",
      "23:25:45 [DEBUG] train episode 1747: reward = 10.00, steps = 2373\n",
      "23:34:26 [DEBUG] train episode 1748: reward = -5.00, steps = 2307\n",
      "23:43:44 [DEBUG] train episode 1749: reward = -1.00, steps = 2472\n",
      "23:54:05 [DEBUG] train episode 1750: reward = -2.00, steps = 2756\n",
      "00:03:50 [DEBUG] train episode 1751: reward = -1.00, steps = 2683\n",
      "00:14:25 [DEBUG] train episode 1752: reward = 9.00, steps = 2176\n",
      "00:22:46 [DEBUG] train episode 1753: reward = -12.00, steps = 1815\n",
      "00:30:46 [DEBUG] train episode 1754: reward = -10.00, steps = 1732\n",
      "00:42:35 [DEBUG] train episode 1755: reward = -2.00, steps = 2584\n",
      "00:53:29 [DEBUG] train episode 1756: reward = 1.00, steps = 2411\n",
      "01:02:54 [DEBUG] train episode 1757: reward = 11.00, steps = 2065\n",
      "01:08:19 [DEBUG] train episode 1758: reward = -16.00, steps = 1205\n",
      "01:19:27 [DEBUG] train episode 1759: reward = 9.00, steps = 2415\n",
      "01:28:32 [DEBUG] train episode 1760: reward = 17.00, steps = 1900\n",
      "01:41:30 [DEBUG] train episode 1761: reward = -1.00, steps = 2744\n",
      "01:53:42 [DEBUG] train episode 1762: reward = 4.00, steps = 2547\n",
      "02:03:48 [DEBUG] train episode 1763: reward = -9.00, steps = 2139\n",
      "02:14:39 [DEBUG] train episode 1764: reward = -3.00, steps = 2317\n",
      "02:25:35 [DEBUG] train episode 1765: reward = -5.00, steps = 2313\n",
      "02:37:36 [DEBUG] train episode 1766: reward = -5.00, steps = 2541\n",
      "02:49:52 [DEBUG] train episode 1767: reward = -5.00, steps = 2605\n",
      "02:59:29 [DEBUG] train episode 1768: reward = -9.00, steps = 2032\n",
      "03:08:55 [DEBUG] train episode 1769: reward = 13.00, steps = 2016\n",
      "03:20:17 [DEBUG] train episode 1770: reward = 8.00, steps = 2427\n",
      "03:25:18 [DEBUG] train episode 1771: reward = -19.00, steps = 1053\n",
      "03:34:07 [DEBUG] train episode 1772: reward = -10.00, steps = 1874\n",
      "03:42:43 [DEBUG] train episode 1773: reward = -11.00, steps = 1820\n",
      "03:54:54 [DEBUG] train episode 1774: reward = -1.00, steps = 2601\n",
      "04:07:11 [DEBUG] train episode 1775: reward = 1.00, steps = 2600\n",
      "04:16:46 [DEBUG] train episode 1776: reward = -8.00, steps = 2019\n",
      "04:27:23 [DEBUG] train episode 1777: reward = 9.00, steps = 2252\n",
      "04:36:31 [DEBUG] train episode 1778: reward = -10.00, steps = 1871\n",
      "04:47:32 [DEBUG] train episode 1779: reward = 8.00, steps = 2289\n",
      "04:58:25 [DEBUG] train episode 1780: reward = -3.00, steps = 2291\n",
      "05:07:50 [DEBUG] train episode 1781: reward = -9.00, steps = 1973\n",
      "05:20:49 [DEBUG] train episode 1782: reward = -1.00, steps = 2721\n",
      "05:32:03 [DEBUG] train episode 1783: reward = 8.00, steps = 2219\n",
      "05:46:06 [DEBUG] train episode 1784: reward = 8.00, steps = 2479\n",
      "05:57:43 [DEBUG] train episode 1785: reward = 9.00, steps = 2101\n",
      "06:11:15 [DEBUG] train episode 1786: reward = -1.00, steps = 2622\n",
      "06:21:01 [DEBUG] train episode 1787: reward = -9.00, steps = 1984\n",
      "06:30:44 [DEBUG] train episode 1788: reward = -8.00, steps = 2010\n",
      "06:40:56 [DEBUG] train episode 1789: reward = -7.00, steps = 2126\n",
      "06:53:27 [DEBUG] train episode 1790: reward = 1.00, steps = 2617\n",
      "07:07:50 [DEBUG] train episode 1791: reward = 1.00, steps = 2984\n",
      "07:17:46 [DEBUG] train episode 1792: reward = 13.00, steps = 2032\n",
      "07:28:37 [DEBUG] train episode 1793: reward = -7.00, steps = 2233\n",
      "07:39:22 [DEBUG] train episode 1794: reward = 8.00, steps = 2241\n",
      "07:51:11 [DEBUG] train episode 1795: reward = 9.00, steps = 2458\n",
      "08:00:34 [DEBUG] train episode 1796: reward = 15.00, steps = 1930\n",
      "08:12:17 [DEBUG] train episode 1797: reward = -8.00, steps = 2433\n",
      "08:21:30 [DEBUG] train episode 1798: reward = 14.00, steps = 1897\n",
      "08:30:44 [DEBUG] train episode 1799: reward = 15.00, steps = 1897\n",
      "08:43:17 [DEBUG] train episode 1800: reward = -5.00, steps = 2610\n",
      "08:55:37 [DEBUG] train episode 1801: reward = -6.00, steps = 2530\n",
      "09:05:36 [DEBUG] train episode 1802: reward = 14.00, steps = 1895\n",
      "09:16:23 [DEBUG] train episode 1803: reward = -7.00, steps = 2055\n",
      "09:29:18 [DEBUG] train episode 1804: reward = 1.00, steps = 2456\n",
      "09:39:42 [DEBUG] train episode 1805: reward = 13.00, steps = 1948\n",
      "09:52:25 [DEBUG] train episode 1806: reward = -5.00, steps = 2461\n",
      "10:07:18 [DEBUG] train episode 1807: reward = 1.00, steps = 2921\n",
      "10:19:00 [DEBUG] train episode 1808: reward = 14.00, steps = 2323\n",
      "10:31:05 [DEBUG] train episode 1809: reward = 1.00, steps = 2402\n",
      "10:40:03 [DEBUG] train episode 1810: reward = -11.00, steps = 1761\n",
      "10:51:37 [DEBUG] train episode 1811: reward = 8.00, steps = 2263\n",
      "11:00:39 [DEBUG] train episode 1812: reward = -11.00, steps = 1769\n",
      "11:14:35 [DEBUG] train episode 1813: reward = -1.00, steps = 2767\n",
      "11:25:02 [DEBUG] train episode 1814: reward = 14.00, steps = 1986\n",
      "11:35:11 [DEBUG] train episode 1815: reward = 13.00, steps = 1992\n",
      "11:46:12 [DEBUG] train episode 1816: reward = 9.00, steps = 2208\n",
      "11:56:18 [DEBUG] train episode 1817: reward = 12.00, steps = 2030\n",
      "12:07:23 [DEBUG] train episode 1818: reward = -6.00, steps = 2219\n",
      "12:17:54 [DEBUG] train episode 1819: reward = -10.00, steps = 2114\n",
      "12:32:03 [DEBUG] train episode 1820: reward = 1.00, steps = 2794\n",
      "12:43:03 [DEBUG] train episode 1821: reward = -8.00, steps = 2208\n",
      "12:51:57 [DEBUG] train episode 1822: reward = 17.00, steps = 1780\n",
      "13:03:21 [DEBUG] train episode 1823: reward = -6.00, steps = 2277\n",
      "13:13:07 [DEBUG] train episode 1824: reward = 14.00, steps = 1947\n",
      "13:22:37 [DEBUG] train episode 1825: reward = 15.00, steps = 1878\n",
      "13:33:28 [DEBUG] train episode 1826: reward = 14.00, steps = 2114\n",
      "13:41:49 [DEBUG] train episode 1827: reward = 18.00, steps = 1658\n",
      "13:55:47 [DEBUG] train episode 1828: reward = -3.00, steps = 2798\n",
      "14:07:07 [DEBUG] train episode 1829: reward = -6.00, steps = 2263\n",
      "14:19:10 [DEBUG] train episode 1830: reward = -1.00, steps = 2396\n",
      "14:27:50 [DEBUG] train episode 1831: reward = -12.00, steps = 1719\n",
      "14:39:35 [DEBUG] train episode 1832: reward = -9.00, steps = 2320\n",
      "14:49:22 [DEBUG] train episode 1833: reward = 15.00, steps = 1961\n",
      "15:01:24 [DEBUG] train episode 1834: reward = -1.00, steps = 2397\n",
      "15:10:37 [DEBUG] train episode 1835: reward = 16.00, steps = 1862\n",
      "15:22:03 [DEBUG] train episode 1836: reward = 8.00, steps = 2289\n",
      "15:32:20 [DEBUG] train episode 1837: reward = 11.00, steps = 2051\n",
      "15:45:01 [DEBUG] train episode 1838: reward = -5.00, steps = 2531\n",
      "15:57:25 [DEBUG] train episode 1839: reward = -5.00, steps = 2439\n",
      "16:08:51 [DEBUG] train episode 1840: reward = 6.00, steps = 2289\n",
      "16:20:45 [DEBUG] train episode 1841: reward = -6.00, steps = 2216\n",
      "16:35:42 [DEBUG] train episode 1842: reward = 9.00, steps = 2496\n",
      "16:50:21 [DEBUG] train episode 1843: reward = -3.00, steps = 2631\n",
      "17:05:08 [DEBUG] train episode 1844: reward = -2.00, steps = 2704\n",
      "17:20:47 [DEBUG] train episode 1845: reward = -1.00, steps = 2889\n",
      "17:31:46 [DEBUG] train episode 1846: reward = 13.00, steps = 1989\n",
      "17:39:56 [DEBUG] train episode 1847: reward = -14.00, steps = 1490\n",
      "17:50:47 [DEBUG] train episode 1848: reward = 13.00, steps = 1969\n",
      "18:01:46 [DEBUG] train episode 1849: reward = 14.00, steps = 1957\n",
      "18:15:54 [DEBUG] train episode 1850: reward = -1.00, steps = 2495\n",
      "18:28:21 [DEBUG] train episode 1851: reward = -6.00, steps = 2222\n",
      "18:42:38 [DEBUG] train episode 1852: reward = -2.00, steps = 2564\n",
      "18:53:00 [DEBUG] train episode 1853: reward = 15.00, steps = 1896\n",
      "19:08:11 [DEBUG] train episode 1854: reward = -1.00, steps = 2889\n",
      "19:16:12 [DEBUG] train episode 1855: reward = -13.00, steps = 1516\n",
      "19:27:52 [DEBUG] train episode 1856: reward = 9.00, steps = 2203\n",
      "19:42:30 [DEBUG] train episode 1857: reward = -1.00, steps = 2719\n",
      "19:56:34 [DEBUG] train episode 1858: reward = -1.00, steps = 2669\n",
      "20:08:02 [DEBUG] train episode 1859: reward = 10.00, steps = 2185\n",
      "20:19:08 [DEBUG] train episode 1860: reward = 13.00, steps = 2106\n",
      "20:33:18 [DEBUG] train episode 1861: reward = -1.00, steps = 2643\n",
      "20:43:10 [DEBUG] train episode 1862: reward = 14.00, steps = 1890\n",
      "20:55:03 [DEBUG] train episode 1863: reward = -6.00, steps = 2274\n",
      "21:06:20 [DEBUG] train episode 1864: reward = 11.00, steps = 2153\n",
      "21:17:25 [DEBUG] train episode 1865: reward = 13.00, steps = 2046\n",
      "21:31:21 [DEBUG] train episode 1866: reward = -6.00, steps = 2271\n",
      "21:46:48 [DEBUG] train episode 1867: reward = 12.00, steps = 2222\n",
      "21:53:11 [DEBUG] train episode 1868: reward = -19.00, steps = 1010\n",
      "22:06:53 [DEBUG] train episode 1869: reward = 15.00, steps = 2106\n",
      "22:21:01 [DEBUG] train episode 1870: reward = 10.00, steps = 2271\n",
      "22:31:59 [DEBUG] train episode 1871: reward = 15.00, steps = 1834\n",
      "22:48:58 [DEBUG] train episode 1872: reward = -1.00, steps = 2868\n",
      "22:57:07 [DEBUG] train episode 1873: reward = -17.00, steps = 1313\n",
      "23:11:51 [DEBUG] train episode 1874: reward = 10.00, steps = 2163\n",
      "23:23:37 [DEBUG] train episode 1875: reward = -5.00, steps = 2098\n",
      "23:39:48 [DEBUG] train episode 1876: reward = -3.00, steps = 2608\n",
      "23:50:33 [DEBUG] train episode 1877: reward = 16.00, steps = 1906\n",
      "00:04:33 [DEBUG] train episode 1878: reward = -8.00, steps = 2614\n",
      "00:10:35 [DEBUG] train episode 1879: reward = -19.00, steps = 1095\n",
      "00:27:09 [DEBUG] train episode 1880: reward = -2.00, steps = 2689\n",
      "00:34:57 [DEBUG] train episode 1881: reward = -14.00, steps = 1579\n",
      "00:42:27 [DEBUG] train episode 1882: reward = -14.00, steps = 1492\n",
      "00:54:53 [DEBUG] train episode 1883: reward = -4.00, steps = 2523\n",
      "01:08:34 [DEBUG] train episode 1884: reward = -1.00, steps = 2785\n",
      "01:19:24 [DEBUG] train episode 1885: reward = -6.00, steps = 2213\n",
      "01:31:50 [DEBUG] train episode 1886: reward = -1.00, steps = 2517\n",
      "01:45:32 [DEBUG] train episode 1887: reward = -1.00, steps = 2826\n",
      "01:54:30 [DEBUG] train episode 1888: reward = 15.00, steps = 1827\n",
      "02:07:42 [DEBUG] train episode 1889: reward = 1.00, steps = 2721\n",
      "02:16:49 [DEBUG] train episode 1890: reward = -11.00, steps = 1873\n",
      "02:22:51 [DEBUG] train episode 1891: reward = -17.00, steps = 1227\n",
      "02:31:09 [DEBUG] train episode 1892: reward = -12.00, steps = 1676\n",
      "02:45:09 [DEBUG] train episode 1893: reward = -2.00, steps = 2865\n",
      "02:57:55 [DEBUG] train episode 1894: reward = -1.00, steps = 2615\n",
      "03:02:40 [DEBUG] train episode 1895: reward = -19.00, steps = 954\n",
      "03:14:47 [DEBUG] train episode 1896: reward = 1.00, steps = 2512\n",
      "03:19:52 [DEBUG] train episode 1897: reward = -18.00, steps = 1028\n",
      "03:25:49 [DEBUG] train episode 1898: reward = -19.00, steps = 1189\n",
      "03:37:48 [DEBUG] train episode 1899: reward = -1.00, steps = 2451\n",
      "03:47:10 [DEBUG] train episode 1900: reward = -10.00, steps = 1920\n",
      "03:56:58 [DEBUG] train episode 1901: reward = -8.00, steps = 2068\n",
      "04:07:32 [DEBUG] train episode 1902: reward = 10.00, steps = 2164\n",
      "04:13:56 [DEBUG] train episode 1903: reward = -16.00, steps = 1313\n",
      "04:21:52 [DEBUG] train episode 1904: reward = -14.00, steps = 1626\n",
      "04:30:09 [DEBUG] train episode 1905: reward = -13.00, steps = 1691\n",
      "04:35:18 [DEBUG] train episode 1906: reward = -18.00, steps = 1061\n",
      "04:44:49 [DEBUG] train episode 1907: reward = -6.00, steps = 1943\n",
      "04:55:30 [DEBUG] train episode 1908: reward = -10.00, steps = 1964\n",
      "05:05:48 [DEBUG] train episode 1909: reward = 12.00, steps = 2108\n",
      "05:14:35 [DEBUG] train episode 1910: reward = -12.00, steps = 1781\n",
      "05:23:01 [DEBUG] train episode 1911: reward = -13.00, steps = 1721\n",
      "05:31:06 [DEBUG] train episode 1912: reward = -12.00, steps = 1652\n",
      "05:42:50 [DEBUG] train episode 1913: reward = 1.00, steps = 2416\n",
      "05:52:49 [DEBUG] train episode 1914: reward = -8.00, steps = 2061\n",
      "06:03:34 [DEBUG] train episode 1915: reward = -8.00, steps = 2167\n",
      "06:13:33 [DEBUG] train episode 1916: reward = -8.00, steps = 2040\n",
      "06:20:14 [DEBUG] train episode 1917: reward = -14.00, steps = 1380\n",
      "06:31:25 [DEBUG] train episode 1918: reward = -5.00, steps = 2302\n",
      "06:42:03 [DEBUG] train episode 1919: reward = -6.00, steps = 2215\n",
      "06:51:19 [DEBUG] train episode 1920: reward = -8.00, steps = 1909\n",
      "07:01:18 [DEBUG] train episode 1921: reward = 12.00, steps = 2050\n",
      "07:12:43 [DEBUG] train episode 1922: reward = -4.00, steps = 2377\n",
      "07:23:27 [DEBUG] train episode 1923: reward = -4.00, steps = 2215\n",
      "07:30:59 [DEBUG] train episode 1924: reward = -14.00, steps = 1440\n",
      "07:39:40 [DEBUG] train episode 1925: reward = -11.00, steps = 1778\n",
      "07:51:30 [DEBUG] train episode 1926: reward = 1.00, steps = 2472\n",
      "08:03:32 [DEBUG] train episode 1927: reward = -1.00, steps = 2455\n",
      "08:15:03 [DEBUG] train episode 1928: reward = 3.00, steps = 2393\n",
      "08:26:45 [DEBUG] train episode 1929: reward = 7.00, steps = 2388\n",
      "08:36:25 [DEBUG] train episode 1930: reward = -10.00, steps = 2001\n",
      "08:47:03 [DEBUG] train episode 1931: reward = 8.00, steps = 2193\n",
      "08:56:26 [DEBUG] train episode 1932: reward = -10.00, steps = 1916\n",
      "09:05:56 [DEBUG] train episode 1933: reward = 12.00, steps = 1957\n",
      "09:13:38 [DEBUG] train episode 1934: reward = -13.00, steps = 1578\n",
      "09:27:07 [DEBUG] train episode 1935: reward = -1.00, steps = 2746\n",
      "09:38:57 [DEBUG] train episode 1936: reward = -2.00, steps = 2432\n",
      "09:53:30 [DEBUG] train episode 1937: reward = -2.00, steps = 3001\n",
      "10:03:33 [DEBUG] train episode 1938: reward = 11.00, steps = 2092\n",
      "10:12:25 [DEBUG] train episode 1939: reward = -10.00, steps = 1865\n",
      "10:27:18 [DEBUG] train episode 1940: reward = 11.00, steps = 2378\n",
      "10:42:37 [DEBUG] train episode 1941: reward = -2.00, steps = 2602\n",
      "10:52:23 [DEBUG] train episode 1942: reward = -12.00, steps = 1827\n",
      "11:01:36 [DEBUG] train episode 1943: reward = -12.00, steps = 1786\n",
      "11:13:38 [DEBUG] train episode 1944: reward = -2.00, steps = 2310\n",
      "11:25:34 [DEBUG] train episode 1945: reward = -5.00, steps = 2306\n",
      "11:40:28 [DEBUG] train episode 1946: reward = -2.00, steps = 2899\n",
      "11:54:11 [DEBUG] train episode 1947: reward = -5.00, steps = 2660\n",
      "12:01:47 [DEBUG] train episode 1948: reward = -14.00, steps = 1465\n",
      "12:13:05 [DEBUG] train episode 1949: reward = -7.00, steps = 2157\n",
      "12:25:21 [DEBUG] train episode 1950: reward = -7.00, steps = 2368\n",
      "12:37:34 [DEBUG] train episode 1951: reward = -5.00, steps = 2434\n",
      "12:51:08 [DEBUG] train episode 1952: reward = 5.00, steps = 2636\n",
      "13:03:57 [DEBUG] train episode 1953: reward = -2.00, steps = 2542\n",
      "13:15:13 [DEBUG] train episode 1954: reward = -6.00, steps = 2253\n",
      "13:23:09 [DEBUG] train episode 1955: reward = -14.00, steps = 1427\n",
      "13:35:56 [DEBUG] train episode 1956: reward = 6.00, steps = 2250\n",
      "13:47:53 [DEBUG] train episode 1957: reward = 12.00, steps = 2184\n",
      "13:57:23 [DEBUG] train episode 1958: reward = -11.00, steps = 1740\n",
      "14:09:50 [DEBUG] train episode 1959: reward = -5.00, steps = 2311\n",
      "14:19:26 [DEBUG] train episode 1960: reward = 16.00, steps = 1780\n",
      "14:26:33 [DEBUG] train episode 1961: reward = -19.00, steps = 1062\n",
      "14:38:07 [DEBUG] train episode 1962: reward = 12.00, steps = 2096\n",
      "14:46:04 [DEBUG] train episode 1963: reward = -13.00, steps = 1584\n",
      "14:57:55 [DEBUG] train episode 1964: reward = 8.00, steps = 2264\n",
      "15:09:22 [DEBUG] train episode 1965: reward = -6.00, steps = 2237\n",
      "15:22:06 [DEBUG] train episode 1966: reward = 6.00, steps = 2486\n",
      "15:37:15 [DEBUG] train episode 1967: reward = -4.00, steps = 2871\n",
      "15:47:22 [DEBUG] train episode 1968: reward = 15.00, steps = 1957\n",
      "15:57:45 [DEBUG] train episode 1969: reward = -10.00, steps = 1958\n",
      "16:12:29 [DEBUG] train episode 1970: reward = -1.00, steps = 2805\n",
      "16:23:56 [DEBUG] train episode 1971: reward = -7.00, steps = 2259\n",
      "16:37:43 [DEBUG] train episode 1972: reward = 1.00, steps = 2701\n",
      "16:48:41 [DEBUG] train episode 1973: reward = -6.00, steps = 2154\n",
      "16:58:39 [DEBUG] train episode 1974: reward = -11.00, steps = 1979\n",
      "17:11:13 [DEBUG] train episode 1975: reward = -2.00, steps = 2505\n",
      "17:25:21 [DEBUG] train episode 1976: reward = -2.00, steps = 2778\n",
      "17:40:06 [DEBUG] train episode 1977: reward = -5.00, steps = 2672\n",
      "17:55:26 [DEBUG] train episode 1978: reward = 1.00, steps = 2637\n",
      "18:03:50 [DEBUG] train episode 1979: reward = -14.00, steps = 1559\n",
      "18:19:37 [DEBUG] train episode 1980: reward = 5.00, steps = 2688\n",
      "18:31:27 [DEBUG] train episode 1981: reward = 8.00, steps = 2214\n",
      "18:44:53 [DEBUG] train episode 1982: reward = -3.00, steps = 2480\n",
      "18:56:36 [DEBUG] train episode 1983: reward = 9.00, steps = 2380\n",
      "19:09:01 [DEBUG] train episode 1984: reward = -6.00, steps = 2500\n",
      "19:21:45 [DEBUG] train episode 1985: reward = -3.00, steps = 2523\n",
      "19:30:06 [DEBUG] train episode 1986: reward = -13.00, steps = 1557\n",
      "19:38:58 [DEBUG] train episode 1987: reward = -12.00, steps = 1718\n",
      "19:53:31 [DEBUG] train episode 1988: reward = -1.00, steps = 2918\n",
      "20:06:57 [DEBUG] train episode 1989: reward = 6.00, steps = 2536\n",
      "20:19:17 [DEBUG] train episode 1990: reward = 8.00, steps = 2308\n",
      "20:32:37 [DEBUG] train episode 1991: reward = 12.00, steps = 2490\n",
      "20:46:33 [DEBUG] train episode 1992: reward = 12.00, steps = 2181\n",
      "20:58:36 [DEBUG] train episode 1993: reward = 9.00, steps = 2222\n",
      "21:10:51 [DEBUG] train episode 1994: reward = 6.00, steps = 2335\n",
      "21:25:39 [DEBUG] train episode 1995: reward = 1.00, steps = 2832\n",
      "21:40:24 [DEBUG] train episode 1996: reward = -4.00, steps = 2833\n",
      "21:54:51 [DEBUG] train episode 1997: reward = -1.00, steps = 2659\n",
      "22:07:51 [DEBUG] train episode 1998: reward = 12.00, steps = 2173\n",
      "22:17:58 [DEBUG] train episode 1999: reward = 13.00, steps = 1906\n",
      "22:28:41 [DEBUG] train episode 2000: reward = -11.00, steps = 1976\n",
      "22:42:09 [DEBUG] train episode 2001: reward = 8.00, steps = 2572\n",
      "22:56:11 [DEBUG] train episode 2002: reward = -1.00, steps = 2741\n",
      "23:07:18 [DEBUG] train episode 2003: reward = 13.00, steps = 2022\n",
      "23:21:39 [DEBUG] train episode 2004: reward = 7.00, steps = 2472\n",
      "23:35:30 [DEBUG] train episode 2005: reward = -1.00, steps = 2629\n",
      "23:45:31 [DEBUG] train episode 2006: reward = -9.00, steps = 1885\n",
      "23:57:14 [DEBUG] train episode 2007: reward = -11.00, steps = 2012\n",
      "00:12:10 [DEBUG] train episode 2008: reward = -1.00, steps = 2823\n",
      "00:22:40 [DEBUG] train episode 2009: reward = -11.00, steps = 2088\n",
      "00:34:08 [DEBUG] train episode 2010: reward = -8.00, steps = 2174\n",
      "00:46:36 [DEBUG] train episode 2011: reward = 6.00, steps = 2572\n",
      "00:54:54 [DEBUG] train episode 2012: reward = -11.00, steps = 1730\n",
      "01:04:41 [DEBUG] train episode 2013: reward = 11.00, steps = 2023\n",
      "01:17:12 [DEBUG] train episode 2014: reward = 7.00, steps = 2607\n",
      "01:25:50 [DEBUG] train episode 2015: reward = 17.00, steps = 1767\n",
      "01:39:29 [DEBUG] train episode 2016: reward = -2.00, steps = 2837\n",
      "01:53:56 [DEBUG] train episode 2017: reward = 1.00, steps = 2972\n",
      "02:03:19 [DEBUG] train episode 2018: reward = 16.00, steps = 1926\n",
      "02:12:24 [DEBUG] train episode 2019: reward = 15.00, steps = 1859\n",
      "02:24:09 [DEBUG] train episode 2020: reward = -6.00, steps = 2412\n",
      "02:35:02 [DEBUG] train episode 2021: reward = 11.00, steps = 2232\n",
      "02:39:46 [DEBUG] train episode 2022: reward = -20.00, steps = 958\n",
      "02:53:10 [DEBUG] train episode 2023: reward = -1.00, steps = 2719\n",
      "03:05:56 [DEBUG] train episode 2024: reward = -2.00, steps = 2599\n",
      "03:19:19 [DEBUG] train episode 2025: reward = -2.00, steps = 2721\n",
      "03:29:21 [DEBUG] train episode 2026: reward = 14.00, steps = 2016\n",
      "03:41:34 [DEBUG] train episode 2027: reward = 9.00, steps = 2476\n",
      "03:54:09 [DEBUG] train episode 2028: reward = 8.00, steps = 2539\n",
      "04:03:20 [DEBUG] train episode 2029: reward = 16.00, steps = 1769\n",
      "04:15:23 [DEBUG] train episode 2030: reward = 11.00, steps = 2400\n",
      "04:28:18 [DEBUG] train episode 2031: reward = 6.00, steps = 2564\n",
      "04:42:22 [DEBUG] train episode 2032: reward = -2.00, steps = 2768\n",
      "04:52:05 [DEBUG] train episode 2033: reward = -13.00, steps = 1929\n",
      "05:06:14 [DEBUG] train episode 2034: reward = 1.00, steps = 2800\n",
      "05:19:22 [DEBUG] train episode 2035: reward = 1.00, steps = 2605\n",
      "05:32:13 [DEBUG] train episode 2036: reward = 9.00, steps = 2463\n",
      "05:44:45 [DEBUG] train episode 2037: reward = 7.00, steps = 2454\n",
      "05:57:34 [DEBUG] train episode 2038: reward = -3.00, steps = 2525\n",
      "06:09:31 [DEBUG] train episode 2039: reward = 9.00, steps = 2387\n",
      "06:18:56 [DEBUG] train episode 2040: reward = 16.00, steps = 1882\n",
      "06:28:51 [DEBUG] train episode 2041: reward = 10.00, steps = 1979\n",
      "06:39:35 [DEBUG] train episode 2042: reward = 9.00, steps = 2173\n",
      "06:51:55 [DEBUG] train episode 2043: reward = -7.00, steps = 2507\n",
      "07:02:32 [DEBUG] train episode 2044: reward = 9.00, steps = 2180\n",
      "07:12:45 [DEBUG] train episode 2045: reward = 13.00, steps = 2093\n",
      "07:26:17 [DEBUG] train episode 2046: reward = -2.00, steps = 2761\n",
      "07:38:18 [DEBUG] train episode 2047: reward = 8.00, steps = 2468\n",
      "07:47:41 [DEBUG] train episode 2048: reward = 16.00, steps = 1904\n",
      "07:59:20 [DEBUG] train episode 2049: reward = 11.00, steps = 2366\n",
      "08:11:18 [DEBUG] train episode 2050: reward = -2.00, steps = 2424\n",
      "08:23:03 [DEBUG] train episode 2051: reward = 6.00, steps = 2326\n",
      "08:32:28 [DEBUG] train episode 2052: reward = -9.00, steps = 1905\n",
      "08:42:50 [DEBUG] train episode 2053: reward = 13.00, steps = 2121\n",
      "08:56:59 [DEBUG] train episode 2054: reward = 12.00, steps = 2588\n",
      "09:11:25 [DEBUG] train episode 2055: reward = 12.00, steps = 2472\n",
      "09:22:28 [DEBUG] train episode 2056: reward = 15.00, steps = 1991\n",
      "09:32:17 [DEBUG] train episode 2057: reward = 14.00, steps = 1896\n",
      "09:41:31 [DEBUG] train episode 2058: reward = 16.00, steps = 1826\n",
      "09:51:02 [DEBUG] train episode 2059: reward = 16.00, steps = 1836\n",
      "10:02:43 [DEBUG] train episode 2060: reward = -8.00, steps = 2229\n",
      "10:14:29 [DEBUG] train episode 2061: reward = 16.00, steps = 2231\n",
      "10:26:55 [DEBUG] train episode 2062: reward = -5.00, steps = 2405\n",
      "10:36:37 [DEBUG] train episode 2063: reward = 15.00, steps = 1859\n",
      "10:51:23 [DEBUG] train episode 2064: reward = 1.00, steps = 2877\n",
      "11:06:18 [DEBUG] train episode 2065: reward = -2.00, steps = 2868\n",
      "11:20:07 [DEBUG] train episode 2066: reward = -2.00, steps = 2742\n",
      "11:32:46 [DEBUG] train episode 2067: reward = 1.00, steps = 2495\n",
      "11:44:57 [DEBUG] train episode 2068: reward = 9.00, steps = 2465\n",
      "11:58:24 [DEBUG] train episode 2069: reward = 1.00, steps = 2709\n",
      "12:08:35 [DEBUG] train episode 2070: reward = 12.00, steps = 2022\n",
      "12:17:53 [DEBUG] train episode 2071: reward = 15.00, steps = 1862\n",
      "12:29:30 [DEBUG] train episode 2072: reward = 13.00, steps = 2297\n",
      "12:41:26 [DEBUG] train episode 2073: reward = -6.00, steps = 2357\n",
      "12:55:17 [DEBUG] train episode 2074: reward = -1.00, steps = 2752\n",
      "13:04:38 [DEBUG] train episode 2075: reward = 16.00, steps = 1852\n",
      "13:16:56 [DEBUG] train episode 2076: reward = -5.00, steps = 2404\n",
      "13:26:14 [DEBUG] train episode 2077: reward = 16.00, steps = 1821\n",
      "13:37:55 [DEBUG] train episode 2078: reward = -7.00, steps = 2260\n",
      "13:47:56 [DEBUG] train episode 2079: reward = -10.00, steps = 1986\n",
      "13:58:56 [DEBUG] train episode 2080: reward = 8.00, steps = 2165\n",
      "14:08:16 [DEBUG] train episode 2081: reward = -11.00, steps = 1839\n",
      "14:19:35 [DEBUG] train episode 2082: reward = -9.00, steps = 2260\n",
      "14:32:11 [DEBUG] train episode 2083: reward = -1.00, steps = 2511\n",
      "14:42:18 [DEBUG] train episode 2084: reward = -10.00, steps = 2024\n",
      "14:55:30 [DEBUG] train episode 2085: reward = -1.00, steps = 2601\n",
      "15:05:09 [DEBUG] train episode 2086: reward = 14.00, steps = 1937\n",
      "15:16:49 [DEBUG] train episode 2087: reward = 7.00, steps = 2328\n",
      "15:27:43 [DEBUG] train episode 2088: reward = 12.00, steps = 2099\n",
      "15:36:47 [DEBUG] train episode 2089: reward = 17.00, steps = 1697\n",
      "15:46:03 [DEBUG] train episode 2090: reward = -12.00, steps = 1714\n",
      "15:56:45 [DEBUG] train episode 2091: reward = 14.00, steps = 2063\n",
      "16:08:37 [DEBUG] train episode 2092: reward = -7.00, steps = 2290\n",
      "16:15:47 [DEBUG] train episode 2093: reward = -16.00, steps = 1412\n",
      "16:22:30 [DEBUG] train episode 2094: reward = -16.00, steps = 1234\n",
      "16:34:44 [DEBUG] train episode 2095: reward = 9.00, steps = 2313\n",
      "16:47:56 [DEBUG] train episode 2096: reward = 8.00, steps = 2491\n",
      "16:58:34 [DEBUG] train episode 2097: reward = 12.00, steps = 1998\n",
      "17:11:13 [DEBUG] train episode 2098: reward = 1.00, steps = 2366\n",
      "17:24:25 [DEBUG] train episode 2099: reward = -6.00, steps = 2372\n",
      "17:34:25 [DEBUG] train episode 2100: reward = -10.00, steps = 1947\n",
      "17:48:21 [DEBUG] train episode 2101: reward = -2.00, steps = 2766\n",
      "18:01:57 [DEBUG] train episode 2102: reward = -3.00, steps = 2653\n",
      "18:12:41 [DEBUG] train episode 2103: reward = 16.00, steps = 1853\n",
      "18:23:46 [DEBUG] train episode 2104: reward = 14.00, steps = 2114\n",
      "18:38:37 [DEBUG] train episode 2105: reward = -1.00, steps = 2649\n",
      "18:50:15 [DEBUG] train episode 2106: reward = 10.00, steps = 2127\n",
      "19:00:40 [DEBUG] train episode 2107: reward = -9.00, steps = 1946\n",
      "19:12:18 [DEBUG] train episode 2108: reward = 9.00, steps = 2238\n",
      "19:21:25 [DEBUG] train episode 2109: reward = 16.00, steps = 1816\n",
      "19:34:04 [DEBUG] train episode 2110: reward = 6.00, steps = 2480\n",
      "19:46:38 [DEBUG] train episode 2111: reward = 8.00, steps = 2440\n",
      "20:00:14 [DEBUG] train episode 2112: reward = -1.00, steps = 2667\n",
      "20:11:50 [DEBUG] train episode 2113: reward = 10.00, steps = 2250\n",
      "20:25:59 [DEBUG] train episode 2114: reward = -1.00, steps = 2734\n",
      "20:39:30 [DEBUG] train episode 2115: reward = 7.00, steps = 2532\n",
      "20:51:04 [DEBUG] train episode 2116: reward = -3.00, steps = 2215\n",
      "21:06:19 [DEBUG] train episode 2117: reward = 1.00, steps = 2790\n",
      "21:15:46 [DEBUG] train episode 2118: reward = 16.00, steps = 1864\n",
      "21:24:45 [DEBUG] train episode 2119: reward = 17.00, steps = 1699\n",
      "21:38:43 [DEBUG] train episode 2120: reward = 1.00, steps = 2433\n",
      "21:49:39 [DEBUG] train episode 2121: reward = 15.00, steps = 1909\n",
      "21:59:52 [DEBUG] train episode 2122: reward = 15.00, steps = 1822\n",
      "22:13:06 [DEBUG] train episode 2123: reward = -2.00, steps = 2447\n",
      "22:24:52 [DEBUG] train episode 2124: reward = 8.00, steps = 2204\n",
      "22:39:21 [DEBUG] train episode 2125: reward = -1.00, steps = 2639\n",
      "22:51:28 [DEBUG] train episode 2126: reward = 15.00, steps = 2166\n",
      "23:03:34 [DEBUG] train episode 2127: reward = 12.00, steps = 2310\n",
      "23:12:14 [DEBUG] train episode 2128: reward = 17.00, steps = 1705\n",
      "23:24:20 [DEBUG] train episode 2129: reward = 14.00, steps = 2188\n",
      "23:39:15 [DEBUG] train episode 2130: reward = 8.00, steps = 2565\n",
      "23:53:17 [DEBUG] train episode 2131: reward = -2.00, steps = 2634\n",
      "00:06:13 [DEBUG] train episode 2132: reward = 12.00, steps = 2151\n",
      "00:19:22 [DEBUG] train episode 2133: reward = -8.00, steps = 2036\n",
      "00:31:52 [DEBUG] train episode 2134: reward = 15.00, steps = 2012\n",
      "00:42:46 [DEBUG] train episode 2135: reward = 10.00, steps = 2166\n",
      "00:52:41 [DEBUG] train episode 2136: reward = 17.00, steps = 1890\n",
      "01:02:53 [DEBUG] train episode 2137: reward = 18.00, steps = 2057\n",
      "01:17:03 [DEBUG] train episode 2138: reward = 1.00, steps = 2845\n",
      "01:27:24 [DEBUG] train episode 2139: reward = 17.00, steps = 2080\n",
      "01:37:52 [DEBUG] train episode 2140: reward = -7.00, steps = 2086\n",
      "01:50:06 [DEBUG] train episode 2141: reward = -5.00, steps = 2430\n",
      "02:01:27 [DEBUG] train episode 2142: reward = 12.00, steps = 2248\n",
      "02:12:13 [DEBUG] train episode 2143: reward = -7.00, steps = 2170\n",
      "02:26:18 [DEBUG] train episode 2144: reward = 1.00, steps = 2832\n",
      "02:37:15 [DEBUG] train episode 2145: reward = 15.00, steps = 2184\n",
      "02:47:47 [DEBUG] train episode 2146: reward = 9.00, steps = 2119\n",
      "03:02:21 [DEBUG] train episode 2147: reward = 1.00, steps = 2943\n",
      "03:15:19 [DEBUG] train episode 2148: reward = 1.00, steps = 2633\n",
      "03:25:31 [DEBUG] train episode 2149: reward = 12.00, steps = 2065\n",
      "03:35:23 [DEBUG] train episode 2150: reward = 14.00, steps = 1999\n",
      "03:44:13 [DEBUG] train episode 2151: reward = 16.00, steps = 1777\n",
      "03:54:09 [DEBUG] train episode 2152: reward = 14.00, steps = 2016\n",
      "04:03:29 [DEBUG] train episode 2153: reward = 15.00, steps = 1890\n",
      "04:12:00 [DEBUG] train episode 2154: reward = 18.00, steps = 1731\n",
      "04:23:19 [DEBUG] train episode 2155: reward = 12.00, steps = 2259\n",
      "04:31:56 [DEBUG] train episode 2156: reward = -12.00, steps = 1723\n",
      "04:44:22 [DEBUG] train episode 2157: reward = -3.00, steps = 2525\n",
      "04:54:39 [DEBUG] train episode 2158: reward = 12.00, steps = 2110\n",
      "05:04:23 [DEBUG] train episode 2159: reward = -10.00, steps = 1925\n",
      "05:15:03 [DEBUG] train episode 2160: reward = 12.00, steps = 2145\n",
      "05:24:48 [DEBUG] train episode 2161: reward = 14.00, steps = 1962\n",
      "05:34:39 [DEBUG] train episode 2162: reward = 14.00, steps = 1977\n",
      "05:44:52 [DEBUG] train episode 2163: reward = 14.00, steps = 2074\n",
      "05:56:33 [DEBUG] train episode 2164: reward = -5.00, steps = 2375\n",
      "06:01:13 [DEBUG] train episode 2165: reward = -19.00, steps = 933\n",
      "06:09:21 [DEBUG] train episode 2166: reward = -11.00, steps = 1637\n",
      "06:21:10 [DEBUG] train episode 2167: reward = -6.00, steps = 2362\n",
      "06:33:42 [DEBUG] train episode 2168: reward = 9.00, steps = 2497\n",
      "06:44:40 [DEBUG] train episode 2169: reward = -7.00, steps = 2192\n",
      "06:50:09 [DEBUG] train episode 2170: reward = -18.00, steps = 1103\n",
      "06:59:56 [DEBUG] train episode 2171: reward = -7.00, steps = 1967\n",
      "07:10:40 [DEBUG] train episode 2172: reward = 9.00, steps = 2169\n",
      "07:21:42 [DEBUG] train episode 2173: reward = -7.00, steps = 2225\n",
      "07:31:55 [DEBUG] train episode 2174: reward = 12.00, steps = 2067\n",
      "07:44:13 [DEBUG] train episode 2175: reward = 8.00, steps = 2468\n",
      "07:55:10 [DEBUG] train episode 2176: reward = 12.00, steps = 2177\n",
      "08:08:48 [DEBUG] train episode 2177: reward = -2.00, steps = 2758\n",
      "08:19:35 [DEBUG] train episode 2178: reward = -6.00, steps = 2191\n",
      "08:28:58 [DEBUG] train episode 2179: reward = -11.00, steps = 1877\n",
      "08:35:49 [DEBUG] train episode 2180: reward = -15.00, steps = 1389\n",
      "08:47:57 [DEBUG] train episode 2181: reward = 9.00, steps = 2464\n",
      "08:57:33 [DEBUG] train episode 2182: reward = -8.00, steps = 1961\n",
      "09:11:32 [DEBUG] train episode 2183: reward = -2.00, steps = 2815\n",
      "09:23:07 [DEBUG] train episode 2184: reward = 12.00, steps = 2336\n",
      "09:35:27 [DEBUG] train episode 2185: reward = 9.00, steps = 2449\n",
      "09:49:01 [DEBUG] train episode 2186: reward = -1.00, steps = 2790\n",
      "10:02:40 [DEBUG] train episode 2187: reward = -1.00, steps = 2846\n",
      "10:11:35 [DEBUG] train episode 2188: reward = 16.00, steps = 1849\n",
      "10:23:28 [DEBUG] train episode 2189: reward = 1.00, steps = 2458\n",
      "10:34:33 [DEBUG] train episode 2190: reward = 12.00, steps = 2280\n",
      "10:49:04 [DEBUG] train episode 2191: reward = 1.00, steps = 3019\n",
      "10:58:46 [DEBUG] train episode 2192: reward = -9.00, steps = 2015\n",
      "11:10:26 [DEBUG] train episode 2193: reward = 1.00, steps = 2427\n",
      "11:23:27 [DEBUG] train episode 2194: reward = 1.00, steps = 2690\n",
      "11:34:24 [DEBUG] train episode 2195: reward = -6.00, steps = 2273\n",
      "11:46:13 [DEBUG] train episode 2196: reward = -1.00, steps = 2490\n",
      "11:55:58 [DEBUG] train episode 2197: reward = 12.00, steps = 2041\n",
      "12:07:52 [DEBUG] train episode 2198: reward = 1.00, steps = 2479\n",
      "12:16:16 [DEBUG] train episode 2199: reward = -12.00, steps = 1750\n",
      "12:25:07 [DEBUG] train episode 2200: reward = -10.00, steps = 1823\n",
      "12:36:12 [DEBUG] train episode 2201: reward = 13.00, steps = 2319\n",
      "12:45:18 [DEBUG] train episode 2202: reward = -10.00, steps = 1882\n",
      "12:53:18 [DEBUG] train episode 2203: reward = 18.00, steps = 1679\n",
      "13:06:07 [DEBUG] train episode 2204: reward = -1.00, steps = 2677\n",
      "13:14:19 [DEBUG] train episode 2205: reward = 18.00, steps = 1700\n",
      "13:18:35 [DEBUG] train episode 2206: reward = -20.00, steps = 888\n",
      "13:31:28 [DEBUG] train episode 2207: reward = 1.00, steps = 2661\n",
      "13:44:36 [DEBUG] train episode 2208: reward = -1.00, steps = 2733\n",
      "13:57:10 [DEBUG] train episode 2209: reward = -2.00, steps = 2617\n",
      "14:06:02 [DEBUG] train episode 2210: reward = -11.00, steps = 1847\n",
      "14:18:11 [DEBUG] train episode 2211: reward = 1.00, steps = 2542\n",
      "14:28:27 [DEBUG] train episode 2212: reward = 13.00, steps = 2145\n",
      "14:37:29 [DEBUG] train episode 2213: reward = 16.00, steps = 1853\n",
      "14:49:48 [DEBUG] train episode 2214: reward = -4.00, steps = 2493\n",
      "14:58:46 [DEBUG] train episode 2215: reward = -11.00, steps = 1844\n",
      "15:09:02 [DEBUG] train episode 2216: reward = -6.00, steps = 2102\n",
      "15:19:02 [DEBUG] train episode 2217: reward = 15.00, steps = 2051\n",
      "15:32:28 [DEBUG] train episode 2218: reward = 1.00, steps = 2746\n",
      "15:43:11 [DEBUG] train episode 2219: reward = 8.00, steps = 2205\n",
      "15:48:29 [DEBUG] train episode 2220: reward = -19.00, steps = 1104\n",
      "15:58:50 [DEBUG] train episode 2221: reward = 15.00, steps = 2171\n",
      "16:08:12 [DEBUG] train episode 2222: reward = -9.00, steps = 1951\n",
      "16:21:27 [DEBUG] train episode 2223: reward = -2.00, steps = 2753\n",
      "16:30:00 [DEBUG] train episode 2224: reward = -12.00, steps = 1741\n",
      "16:42:00 [DEBUG] train episode 2225: reward = 8.00, steps = 2491\n",
      "16:53:32 [DEBUG] train episode 2226: reward = 11.00, steps = 2380\n",
      "17:05:18 [DEBUG] train episode 2227: reward = -1.00, steps = 2462\n",
      "17:14:03 [DEBUG] train episode 2228: reward = -11.00, steps = 1801\n",
      "17:26:22 [DEBUG] train episode 2229: reward = -5.00, steps = 2490\n",
      "17:37:00 [DEBUG] train episode 2230: reward = -8.00, steps = 2186\n",
      "17:49:43 [DEBUG] train episode 2231: reward = -2.00, steps = 2628\n",
      "18:02:52 [DEBUG] train episode 2232: reward = -1.00, steps = 2733\n",
      "18:15:40 [DEBUG] train episode 2233: reward = 7.00, steps = 2660\n",
      "18:28:10 [DEBUG] train episode 2234: reward = 1.00, steps = 2592\n",
      "18:40:43 [DEBUG] train episode 2235: reward = -4.00, steps = 2598\n",
      "18:52:40 [DEBUG] train episode 2236: reward = -3.00, steps = 2460\n",
      "18:58:57 [DEBUG] train episode 2237: reward = -16.00, steps = 1297\n",
      "19:09:55 [DEBUG] train episode 2238: reward = -3.00, steps = 2297\n",
      "19:17:21 [DEBUG] train episode 2239: reward = -14.00, steps = 1515\n",
      "19:23:24 [DEBUG] train episode 2240: reward = -18.00, steps = 1233\n",
      "19:34:10 [DEBUG] train episode 2241: reward = -6.00, steps = 2065\n",
      "19:44:05 [DEBUG] train episode 2242: reward = -13.00, steps = 1709\n",
      "19:50:28 [DEBUG] train episode 2243: reward = -17.00, steps = 1182\n",
      "20:01:46 [DEBUG] train episode 2244: reward = 12.00, steps = 1977\n",
      "20:06:16 [DEBUG] train episode 2245: reward = -21.00, steps = 790\n",
      "20:20:37 [DEBUG] train episode 2246: reward = -1.00, steps = 2653\n",
      "20:38:51 [DEBUG] train episode 2247: reward = 10.00, steps = 2490\n",
      "21:04:06 [DEBUG] train episode 2248: reward = -7.00, steps = 2301\n",
      "21:20:41 [DEBUG] train episode 2249: reward = 16.00, steps = 2201\n",
      "21:31:44 [DEBUG] train episode 2250: reward = -9.00, steps = 1944\n",
      "21:44:42 [DEBUG] train episode 2251: reward = 9.00, steps = 2424\n",
      "21:49:34 [DEBUG] train episode 2252: reward = -20.00, steps = 932\n",
      "22:00:26 [DEBUG] train episode 2253: reward = -9.00, steps = 1854\n",
      "22:14:06 [DEBUG] train episode 2254: reward = 1.00, steps = 2487\n",
      "22:27:19 [DEBUG] train episode 2255: reward = -4.00, steps = 2408\n",
      "22:41:45 [DEBUG] train episode 2256: reward = -2.00, steps = 2660\n",
      "22:56:08 [DEBUG] train episode 2257: reward = -2.00, steps = 2672\n",
      "23:03:47 [DEBUG] train episode 2258: reward = -14.00, steps = 1399\n",
      "23:13:13 [DEBUG] train episode 2259: reward = -12.00, steps = 1677\n",
      "23:27:21 [DEBUG] train episode 2260: reward = 1.00, steps = 2640\n",
      "23:34:26 [DEBUG] train episode 2261: reward = -17.00, steps = 1325\n",
      "23:42:31 [DEBUG] train episode 2262: reward = -14.00, steps = 1550\n",
      "23:54:57 [DEBUG] train episode 2263: reward = 13.00, steps = 2025\n",
      "00:08:36 [DEBUG] train episode 2264: reward = 1.00, steps = 2451\n",
      "00:17:07 [DEBUG] train episode 2265: reward = -13.00, steps = 1635\n",
      "00:29:48 [DEBUG] train episode 2266: reward = -5.00, steps = 2358\n",
      "00:43:52 [DEBUG] train episode 2267: reward = -2.00, steps = 2798\n",
      "00:54:59 [DEBUG] train episode 2268: reward = 12.00, steps = 2205\n",
      "01:07:07 [DEBUG] train episode 2269: reward = 9.00, steps = 2394\n",
      "01:19:37 [DEBUG] train episode 2270: reward = 8.00, steps = 2488\n",
      "01:31:40 [DEBUG] train episode 2271: reward = -4.00, steps = 2388\n",
      "01:43:59 [DEBUG] train episode 2272: reward = -2.00, steps = 2464\n",
      "01:57:07 [DEBUG] train episode 2273: reward = 1.00, steps = 2626\n",
      "02:07:20 [DEBUG] train episode 2274: reward = 12.00, steps = 2019\n",
      "02:20:11 [DEBUG] train episode 2275: reward = 1.00, steps = 2529\n",
      "02:28:32 [DEBUG] train episode 2276: reward = 19.00, steps = 1647\n",
      "02:41:51 [DEBUG] train episode 2277: reward = 2.00, steps = 2636\n",
      "02:52:04 [DEBUG] train episode 2278: reward = 13.00, steps = 2033\n",
      "03:05:15 [DEBUG] train episode 2279: reward = -2.00, steps = 2630\n",
      "03:17:21 [DEBUG] train episode 2280: reward = -5.00, steps = 2418\n",
      "03:29:43 [DEBUG] train episode 2281: reward = -5.00, steps = 2454\n",
      "03:38:36 [DEBUG] train episode 2282: reward = -11.00, steps = 1775\n",
      "03:50:40 [DEBUG] train episode 2283: reward = -1.00, steps = 2404\n",
      "04:01:01 [DEBUG] train episode 2284: reward = -7.00, steps = 2056\n",
      "04:11:05 [DEBUG] train episode 2285: reward = 13.00, steps = 2013\n",
      "04:20:55 [DEBUG] train episode 2286: reward = -9.00, steps = 1968\n",
      "04:33:05 [DEBUG] train episode 2287: reward = 1.00, steps = 2394\n",
      "04:42:57 [DEBUG] train episode 2288: reward = 12.00, steps = 1979\n",
      "04:54:51 [DEBUG] train episode 2289: reward = 9.00, steps = 2364\n",
      "05:04:10 [DEBUG] train episode 2290: reward = 14.00, steps = 1845\n",
      "05:12:37 [DEBUG] train episode 2291: reward = -12.00, steps = 1690\n",
      "05:21:01 [DEBUG] train episode 2292: reward = 17.00, steps = 1680\n",
      "05:33:12 [DEBUG] train episode 2293: reward = 8.00, steps = 2427\n",
      "05:43:29 [DEBUG] train episode 2294: reward = 15.00, steps = 2038\n",
      "05:54:00 [DEBUG] train episode 2295: reward = 12.00, steps = 2063\n",
      "06:06:29 [DEBUG] train episode 2296: reward = -2.00, steps = 2483\n",
      "06:15:51 [DEBUG] train episode 2297: reward = 16.00, steps = 1866\n",
      "06:27:56 [DEBUG] train episode 2298: reward = -6.00, steps = 2397\n",
      "06:37:41 [DEBUG] train episode 2299: reward = 16.00, steps = 1948\n",
      "06:50:10 [DEBUG] train episode 2300: reward = -3.00, steps = 2481\n",
      "07:02:54 [DEBUG] train episode 2301: reward = -1.00, steps = 2511\n",
      "07:14:28 [DEBUG] train episode 2302: reward = 9.00, steps = 2304\n",
      "07:25:00 [DEBUG] train episode 2303: reward = 11.00, steps = 2059\n",
      "07:38:18 [DEBUG] train episode 2304: reward = -1.00, steps = 2621\n",
      "07:52:30 [DEBUG] train episode 2305: reward = -2.00, steps = 2797\n",
      "08:00:18 [DEBUG] train episode 2306: reward = -14.00, steps = 1524\n",
      "08:10:51 [DEBUG] train episode 2307: reward = 13.00, steps = 2085\n",
      "08:22:57 [DEBUG] train episode 2308: reward = 12.00, steps = 2374\n",
      "08:36:44 [DEBUG] train episode 2309: reward = -1.00, steps = 2734\n",
      "08:47:40 [DEBUG] train episode 2310: reward = -8.00, steps = 2155\n",
      "08:57:45 [DEBUG] train episode 2311: reward = 13.00, steps = 1988\n",
      "09:05:04 [DEBUG] train episode 2312: reward = -15.00, steps = 1426\n",
      "09:15:40 [DEBUG] train episode 2313: reward = 13.00, steps = 2084\n",
      "09:28:21 [DEBUG] train episode 2314: reward = 1.00, steps = 2702\n",
      "09:40:55 [DEBUG] train episode 2315: reward = -1.00, steps = 2453\n",
      "09:51:22 [DEBUG] train episode 2316: reward = 13.00, steps = 2057\n",
      "10:03:09 [DEBUG] train episode 2317: reward = 13.00, steps = 2331\n",
      "10:12:10 [DEBUG] train episode 2318: reward = -10.00, steps = 1785\n",
      "10:23:50 [DEBUG] train episode 2319: reward = 8.00, steps = 2278\n",
      "10:33:30 [DEBUG] train episode 2320: reward = 17.00, steps = 1894\n",
      "10:43:24 [DEBUG] train episode 2321: reward = 15.00, steps = 1939\n",
      "10:52:15 [DEBUG] train episode 2322: reward = 17.00, steps = 1741\n",
      "10:52:18 [INFO] ==== test ====\n",
      "10:52:52 [DEBUG] test episode 0: reward = 17.00, steps = 1738\n",
      "10:53:39 [DEBUG] test episode 1: reward = 12.00, steps = 2318\n",
      "10:54:12 [DEBUG] test episode 2: reward = 19.00, steps = 1598\n",
      "10:54:58 [DEBUG] test episode 3: reward = 12.00, steps = 2319\n",
      "10:55:33 [DEBUG] test episode 4: reward = 17.00, steps = 1734\n",
      "10:56:07 [DEBUG] test episode 5: reward = 17.00, steps = 1734\n",
      "10:56:42 [DEBUG] test episode 6: reward = 17.00, steps = 1732\n",
      "10:57:14 [DEBUG] test episode 7: reward = 19.00, steps = 1598\n",
      "10:57:49 [DEBUG] test episode 8: reward = 17.00, steps = 1734\n",
      "10:58:35 [DEBUG] test episode 9: reward = 12.00, steps = 2318\n",
      "10:59:21 [DEBUG] test episode 10: reward = 12.00, steps = 2323\n",
      "11:00:07 [DEBUG] test episode 11: reward = 12.00, steps = 2317\n",
      "11:00:55 [DEBUG] test episode 12: reward = 12.00, steps = 2316\n",
      "11:01:30 [DEBUG] test episode 13: reward = 17.00, steps = 1737\n",
      "11:02:04 [DEBUG] test episode 14: reward = 19.00, steps = 1602\n",
      "11:02:50 [DEBUG] test episode 15: reward = 12.00, steps = 2314\n",
      "11:03:37 [DEBUG] test episode 16: reward = 12.00, steps = 2318\n",
      "11:04:12 [DEBUG] test episode 17: reward = 17.00, steps = 1735\n",
      "11:04:47 [DEBUG] test episode 18: reward = 17.00, steps = 1734\n",
      "11:05:34 [DEBUG] test episode 19: reward = 12.00, steps = 2324\n",
      "11:06:20 [DEBUG] test episode 20: reward = 12.00, steps = 2329\n",
      "11:06:55 [DEBUG] test episode 21: reward = 17.00, steps = 1738\n",
      "11:07:30 [DEBUG] test episode 22: reward = 17.00, steps = 1737\n",
      "11:08:16 [DEBUG] test episode 23: reward = 12.00, steps = 2323\n",
      "11:09:04 [DEBUG] test episode 24: reward = 12.00, steps = 2319\n",
      "11:09:37 [DEBUG] test episode 25: reward = 19.00, steps = 1596\n",
      "11:10:24 [DEBUG] test episode 26: reward = 12.00, steps = 2320\n",
      "11:11:12 [DEBUG] test episode 27: reward = 12.00, steps = 2316\n",
      "11:11:44 [DEBUG] test episode 28: reward = 19.00, steps = 1597\n",
      "11:12:32 [DEBUG] test episode 29: reward = 12.00, steps = 2316\n",
      "11:13:07 [DEBUG] test episode 30: reward = 17.00, steps = 1737\n",
      "11:13:54 [DEBUG] test episode 31: reward = 12.00, steps = 2328\n",
      "11:14:40 [DEBUG] test episode 32: reward = 12.00, steps = 2317\n",
      "11:15:26 [DEBUG] test episode 33: reward = 12.00, steps = 2320\n",
      "11:16:13 [DEBUG] test episode 34: reward = 12.00, steps = 2315\n",
      "11:16:44 [DEBUG] test episode 35: reward = 19.00, steps = 1602\n",
      "11:17:16 [DEBUG] test episode 36: reward = 19.00, steps = 1599\n",
      "11:18:04 [DEBUG] test episode 37: reward = 12.00, steps = 2327\n",
      "11:18:50 [DEBUG] test episode 38: reward = 12.00, steps = 2324\n",
      "11:19:37 [DEBUG] test episode 39: reward = 12.00, steps = 2316\n",
      "11:20:24 [DEBUG] test episode 40: reward = 12.00, steps = 2329\n",
      "11:21:11 [DEBUG] test episode 41: reward = 12.00, steps = 2317\n",
      "11:22:02 [DEBUG] test episode 42: reward = 12.00, steps = 2328\n",
      "11:22:34 [DEBUG] test episode 43: reward = 19.00, steps = 1598\n",
      "11:23:09 [DEBUG] test episode 44: reward = 17.00, steps = 1731\n",
      "11:23:44 [DEBUG] test episode 45: reward = 17.00, steps = 1737\n",
      "11:24:16 [DEBUG] test episode 46: reward = 19.00, steps = 1596\n",
      "11:25:04 [DEBUG] test episode 47: reward = 12.00, steps = 2313\n",
      "11:25:51 [DEBUG] test episode 48: reward = 12.00, steps = 2328\n",
      "11:26:24 [DEBUG] test episode 49: reward = 19.00, steps = 1601\n",
      "11:26:56 [DEBUG] test episode 50: reward = 19.00, steps = 1596\n",
      "11:27:44 [DEBUG] test episode 51: reward = 12.00, steps = 2313\n",
      "11:28:31 [DEBUG] test episode 52: reward = 12.00, steps = 2319\n",
      "11:29:20 [DEBUG] test episode 53: reward = 12.00, steps = 2314\n",
      "11:30:09 [DEBUG] test episode 54: reward = 12.00, steps = 2315\n",
      "11:30:58 [DEBUG] test episode 55: reward = 12.00, steps = 2325\n",
      "11:31:52 [DEBUG] test episode 56: reward = 12.00, steps = 2323\n",
      "11:32:24 [DEBUG] test episode 57: reward = 19.00, steps = 1598\n",
      "11:33:11 [DEBUG] test episode 58: reward = 12.00, steps = 2325\n",
      "11:33:45 [DEBUG] test episode 59: reward = 17.00, steps = 1736\n",
      "11:34:17 [DEBUG] test episode 60: reward = 19.00, steps = 1602\n",
      "11:35:05 [DEBUG] test episode 61: reward = 12.00, steps = 2329\n",
      "11:35:51 [DEBUG] test episode 62: reward = 12.00, steps = 2329\n",
      "11:36:37 [DEBUG] test episode 63: reward = 12.00, steps = 2326\n",
      "11:37:09 [DEBUG] test episode 64: reward = 19.00, steps = 1599\n",
      "11:37:41 [DEBUG] test episode 65: reward = 19.00, steps = 1600\n",
      "11:38:13 [DEBUG] test episode 66: reward = 19.00, steps = 1596\n",
      "11:39:00 [DEBUG] test episode 67: reward = 12.00, steps = 2320\n",
      "11:39:33 [DEBUG] test episode 68: reward = 19.00, steps = 1599\n",
      "11:40:05 [DEBUG] test episode 69: reward = 19.00, steps = 1597\n",
      "11:40:40 [DEBUG] test episode 70: reward = 17.00, steps = 1734\n",
      "11:41:27 [DEBUG] test episode 71: reward = 12.00, steps = 2313\n",
      "11:42:14 [DEBUG] test episode 72: reward = 12.00, steps = 2317\n",
      "11:43:00 [DEBUG] test episode 73: reward = 12.00, steps = 2329\n",
      "11:43:33 [DEBUG] test episode 74: reward = 19.00, steps = 1600\n",
      "11:44:20 [DEBUG] test episode 75: reward = 12.00, steps = 2320\n",
      "11:45:07 [DEBUG] test episode 76: reward = 12.00, steps = 2329\n",
      "11:45:54 [DEBUG] test episode 77: reward = 12.00, steps = 2320\n",
      "11:46:40 [DEBUG] test episode 78: reward = 12.00, steps = 2313\n",
      "11:47:27 [DEBUG] test episode 79: reward = 12.00, steps = 2319\n",
      "11:48:02 [DEBUG] test episode 80: reward = 17.00, steps = 1734\n",
      "11:48:36 [DEBUG] test episode 81: reward = 17.00, steps = 1737\n",
      "11:49:10 [DEBUG] test episode 82: reward = 17.00, steps = 1734\n",
      "11:49:57 [DEBUG] test episode 83: reward = 12.00, steps = 2324\n",
      "11:50:31 [DEBUG] test episode 84: reward = 17.00, steps = 1731\n",
      "11:51:04 [DEBUG] test episode 85: reward = 19.00, steps = 1598\n",
      "11:51:36 [DEBUG] test episode 86: reward = 19.00, steps = 1599\n",
      "11:52:09 [DEBUG] test episode 87: reward = 19.00, steps = 1600\n",
      "11:52:44 [DEBUG] test episode 88: reward = 17.00, steps = 1738\n",
      "11:53:31 [DEBUG] test episode 89: reward = 12.00, steps = 2314\n",
      "11:54:05 [DEBUG] test episode 90: reward = 17.00, steps = 1735\n",
      "11:54:39 [DEBUG] test episode 91: reward = 17.00, steps = 1732\n",
      "11:55:15 [DEBUG] test episode 92: reward = 17.00, steps = 1735\n",
      "11:55:49 [DEBUG] test episode 93: reward = 17.00, steps = 1738\n",
      "11:56:23 [DEBUG] test episode 94: reward = 17.00, steps = 1734\n",
      "11:57:10 [DEBUG] test episode 95: reward = 12.00, steps = 2324\n",
      "11:57:44 [DEBUG] test episode 96: reward = 17.00, steps = 1732\n",
      "11:58:31 [DEBUG] test episode 97: reward = 12.00, steps = 2326\n",
      "11:59:06 [DEBUG] test episode 98: reward = 17.00, steps = 1735\n",
      "11:59:52 [DEBUG] test episode 99: reward = 12.00, steps = 2328\n",
      "11:59:52 [INFO] average episode reward = 14.89 ± 3.03\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAD4CAYAAAAJmJb0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAAAzoUlEQVR4nO2dd3gVVfrHv28avUpAWgQRQbBQIqioqCiCqFh3wbKu4mLDXVf97WJdy+piWcuuFQVlXQusiKioCIgKiGjohJpAgFBCQEIS0nPf3x+35JaZuTP3Trs37+d58uTeM2fmnDl35jtn3vOe9xAzQxAEQUhOUpyugCAIgmAdIvKCIAhJjIi8IAhCEiMiLwiCkMSIyAuCICQxaU5XIJgOHTpwjx49nK6GIAhCQrFy5cqDzJyptM1VIt+jRw/k5OQ4XQ1BEISEgoh2qm2L21xDRN2JaDERbSKiXCL6ky+9PREtIKJtvv/t4i1LEARBMIYZNvk6APcx80kAzgBwFxH1AzAZwCJm7g1gke+7IAiCYCNxizwz72PmVb7PZQA2AegKYCyAGb5sMwBcEW9ZgiAIgjFM9a4hoh4ABgJYAaATM+8DvA8CAB1V9plIRDlElFNcXGxmdQRBEBo9pok8EbUEMBvAPcxcqnc/Zp7KzNnMnJ2ZqTg4LAiCIMSIKSJPROnwCvz7zPyJL7mIiDr7tncGcMCMsgRBEAT9mOFdQwCmAdjEzC8EbfoMwE2+zzcBmBtvWYIgCIIxzOjJDwNwI4ALiGiN7+8SAFMAXERE2wBc5PsuCILQaFmefwj5xeW2lhn3ZChmXgqAVDaPiPf4giAIycL4t34CABRMGWNbmRK7RhAEIYkRkRcEQUhiROQFQRCSGBF5QRAcZeXOX7E8/5DT1dDkgxW7cKi8Gt9uLsLGvZHTgIpKqzD65SWoqKlzoHbauCoKpSAIjY+rX18OwN7BSCNs2V+GB+esx/zc/fh+q3dWfnhdhz69CABw///W4rXrB9teRy2kJy8IgqBBeXUtAKC0qjZq3kPlNVZXxzAi8oIgCBrU1DEAID01ulyy1ZWJARF5QRAEDWrrPQCADB0i70YSs9aCICQE+cXleH+F6qJFOFhebXkdmBlTf8hHUWmVZr6D5dV47bs8MHv744WHK/Dad3l4bv4WAMC6wpKoZanNCg3n281FWJZ3MFC/4x+YhxcXbNW5tzFE5AVBsIzL/70UD83ZoLr9vllrLa9DfnE5nv5yM+58f5VmvntnrcWzX2/Bmt0lAIAb3l6BZ7/egvV7jgAASqvM85y55d0cXP/2CgBA7t5SeBh4edE2044fjIi8IAiWcbSmXnN7ebX1Loe19d6eeXkUkS7zDax6fD35gzYNorLFhnwReUEQBASLrdfoUuOzxVtNisUqLCIvCIKABs+YFJ9hvdYmkU9N0WvJjw0ReUEQXMGc1YUh32vrPXhq3kYcPhrdbFJaVYsnv9iI6rpI85C/h76lqAxPfL4RH/28C5Nnr8Ori/PQY/I8rC88gp4PzMNany1+w54j+M0by3WZUfaUVOJ538AsAKzY8SsOBA3wFh6uwPPztwQGc8P5dPUepJK1Ii8zXgVBcAV/nrkWVw7sFvg+P3c/3lqyA4fKa/DCbwdo7vvCN1vx7o8F6JXZEtcNzVLNN33Zjoi0y15ZGvL9kbm5uut85/urAg8HP//38TrMuGVIYPu6wiO47LQu6HNsq4j975m5BovuG667vFiQnrwgCK6k3uPt/dZ6onep/aaVeqtHMcOoro18cwg281TXej+zxjQpq3vyZq3xOp2IDhDRhqC0x4hoT9hqUYIgCLaiJbBWYFSzE8Um/y6AUQrpLzLzAN/flyaVJQhCgqFmkzbt+JYePbExReSZ+QcAv5pxLEEQ3I/Hw3jss1xsj2G90ufmb0b23xcEzDHBFJdVY/LsdSipqMGfPloDAPh87V58vLIwIq8Sj3y6IeDv7hTL8g7F1C5WYbVNfhIRrfOZc9opZSCiiUSUQ0Q5xcXFFldHEAQz2H6wHO/+WIA//CdHV/7gjvyri/NxsLwGm/eXRvTwn5q3ER/9shs3TFsRkn7///TPjH1rSejgqs1megDAbe+tNLzP+CHdLaiJtSL/OoBeAAYA2Afgn0qZmHkqM2czc3ZmZqaF1REEwWzi0c+M1JTAbFQzjudGjIwHDD6uvSV1sEzkmbmImeuZ2QPgLQBDrCpLEAS7iX+wMD01RdGvHQBSLPY4cRMJG9aAiDoHfb0SgHqUIkEQkholHUtPi+zJ+0l0ifefFRk4E6vO2SwXyg8BLAfQh4gKiWgCgGeJaD0RrQNwPoA/m1GWIAguwqdmry7OwwcrdhnaddiUb7Hj4NGQtLlr9gIA1hYeMVYNm+08/zIxYqTVLp6mzHhl5vEKydPMOLYgCO4j3Jrij7muNds0mXhhwVb8cURv1e3+5jEi4FZZqGTGqyAIMaNXwqz2kw8WSDeYeoycbcLa5AVBEOwiWCjd5KFjyCYvPXlBEBIVJ4XXCT/5QNk6ztzq6kkUSkEQDBOwOaso6PPzt6DW0xCo6+KXfsCcO4ehuMz6NV39dSupqMGAJxagWXqq5WVG4+OcQuwpqVTcdv7z3wEw1us3goi8IAiGoSi2hVcW54V83158FN9vLcbizQesrBaAhp7xNxuLAACVCpEi7ebtpZEhju1CzDWCINgCM1s+ABtMTZ31KztFe9gZO5ZphwpBRF4QhJhx0yCnH79W2iHydj60YkVEXhAEw8TS6bRWDyMPbtcarUq4SfzFJi8IgiFm/rILD3/qjVJiRMvMmNl54Qvfo0+nVlix4xD6HtsaJ3dtg8mj++LDn3eH5NtbUol/fLU57vK06DF5nub26rp6bC1yPuSw9OQFQTDEX2evV405o4UZndu8A+WYt34fDpbXYGneQbzxfb5ivtW7SuIvLA6ICPuPVEXPaAMi8oIg2IKLLBiNChF5QRBixogJhuHOgVorcJNNXkReEARbcJPwNSZE5AVBsAWrJP43by6PSHN6zZH84qOumIQFiMgLgmAXbI1d/ucdv4Z8d1rg/RSVWh/CQQ8i8oIgxIzdLpS6ynGJVSjF4MPGzNmzIfUw4yBENJ2IDhDRhqC09kS0gIi2+f63M6MsQRDcgyGRt1F83dCZN7pOrauX/wPwLoBRYWmTASxi5t4AFvm+C4LQSLFL44nc4cXjhgcNYJLIM/MPAH4NSx4LYIbv8wwAV5hRliAI7uX175QnJwHx9+T7PvJVfAewG5eovJU2+U7MvA8AfP87KmUioolElENEOcXFxRZWRxAEq3lp4VbVbfHa5Ktq9ceicYO+GjXXWIXjA6/MPJWZs5k5OzMz0+nqCIJgEczuMKPYRWMQ+SIi6gwAvv/WrxYgCIJraUwCD7jHldNKkf8MwE2+zzcBmGthWYIguByZ8aqNVQ8FU0INE9GHAM4D0IGICgH8DcAUALOIaAKAXQCuNaMsQRCcI3z5vj0llSEhdxnqYv7o3FwrqxZgef4hvLRwmy1lJQKmiDwzj1fZNMKM4wuC4A6+2rDP6SpEZcWOcEe/xo3jA6+CICQOFMVvheCeGadO45Z2EJEXBMFUXKJtjuMxqPLRHqCxIiIvCIJp1HlE4t2GiLwgCLqJ5gFS72HxonEZIvKCIJiKSLwXtzzrROQFQdCNWyb4JAJ2hVaOhoi8IAim4pYerOO4pB1E5AVBMJXaev2BxJKZkspaQ/mteksSkRcEwQDRlej2/660oR7u5873VzldBQAi8oIgmMySbQedroIQhIi8IAi6kYHXxENEXhAEwQW4fY1XQRAEwYWIyAuCoBux1iQeIvKCIAhJjCnx5LUgogIAZQDqAdQxc7bVZQqCIAheLBd5H+czs/hVCYIg2IyYawRB0I24UFpHIs94ZQDfENFKIpoYvpGIJhJRDhHlFBcX21AdQRBixaqFLQTrsEPkhzHzIACjAdxFROcGb2TmqcyczczZmZmZNlRHEASh8WC5yDPzXt//AwDmABhidZmCIAiCF0tFnohaEFEr/2cAIwFssLJMQRCExMQaU5jV3jWdAMwh74hCGoAPmPlri8sUBMEiZOA18bBU5Jl5O4DTrCxDEAT7EI1PPMSFUhAEIYkRkRcEQUhiROQFQRCSGBF5QRB0QzLyahmJPONVEARBcAgReUEQhCRGRF4QBCGJEZEXBEFwAbLGqyAIjiPjromHiLwgCLqRUMOJh4i8IAhCEiMiLwiCkMSIyAuCICQxIvJCQrB48wFs2V8GAPh87V4UHq6I63jMjPeWF6Cipg4AUFfvwTvLdqCmzhN3Xa3EX++j1XVR867ceRi3zshBvYcDaZ+u3oPHP8/F20u245Vvt0U9xvzc/XhwznrU1nvw+dq9mL5sR1z1F9Sxajax1fHkBcEUbn73FwBAwZQxuPvD1chs1QS/PHRhzMf7dvMBPDI3F1uKyvD3K07Bhz/vwuOfb0RlbT3uPO8Es6ptOku2HcQjc3OxYU8pnrnmVM28V7/+IwDgfzm7MW5IFgDgnplrQvLceGYPtGmWrnqM295bCQDIat8cU77aHEfNBaeQnryQkBSXVce1/9GaegDA4YpaAEBplbdnXFoZvYfsJP43j8MVNbr3Kdfo9XuCevlalFXV6i5PcBeWizwRjSKiLUSUR0STrS5PEIzgf0EW/29txHUycbF6jddUAK8CGA2gH4DxRNTPyjIFwQj6+rGCYD2JOuN1CIA8Zt7OzDUAPgIw1uIyhSSG2VpZZpF9IcmwWuS7Atgd9L3QlxaAiCYSUQ4R5RQXF1tcHUEIRYwQQrJjtcgr3UMhXSVmnsrM2cycnZmZaXF1BMGL1W8EiYLesQgZs0hcrBb5QgDdg753A7DX4jKFJMYqbU6cgUVn6inPxMTFapH/BUBvIupJRBkAxgH4zOIyBSF2XC9mrq+g4DIsFXlmrgMwCcB8AJsAzGLmXCvLFJIbvRJXVlWLp7/cFHUGq3+WoZgjtNm0r9TpKiQ9Vl2Dls94ZeYvAXxpdTmCEMwLC7binWUF6NmhBcb7ZnvqQfrJyizafMDpKggxIjNehYRC74CpvwdfV+/uWDSCYDUi8kJSIuYXQfAiIi8kNdH6/RT2XxCcwqqOiYi8kFDotZn7XSKjWXc+W7s3JBRvMvrPb9pXprotp+AwAGDRpiL896eddlVJsBEReSEpMdIrWrnzcMKZd4zUd/aqQtVtt/4nBwAwYUYOHv50Q7zVElyIiLyQUBjtaKv1zNWOk4QdeaGRIyIvJCVGO+aJM+NVEIwhIi8kFEajRErHXGjsiMgLhvhi3V688M0Wy8t5/PNcLN4SOQHHv85rNKxaL9NMnpq3EYs2FTldDcElWPU2KSIvGGLSB6vxr2/zLC/nnWUFuPmdXyLS7/lojeVl28VbS3Zgwowcp6shaPDuzac7XYW4EZEXEgqj5hfVAVaVI4l5R/CTkZaC8/p0dLoacSMiLyQURv3YdfvVu9+6A0C8fwTjiMgLCYUVoh384BARFRxDZrwKjZGPVxbi7g9XB77vPFQR+FxWVRt1f6M9/+nLduDVxbGPOTAzbpy2QnHQ2C6+yd2Pm6b/7Fj5grsQkRdczf3/W4vP1yovJrYs75DqftE8FYK1P9wT57n5sXsPVdd5sGTbQdz+3sqYjxEvE99bie+3ynrJghcReSGBca9txY01S8a4PFZihfVkUFZbC46qjWUiT0SPEdEeIlrj+7vEqrIEIRx/51yPron4CcmM1StDvcjMz1tchiBEYDisgdnuNS58bsizzHmcmKQn5hohqQn2h7/9vZWY9MEqnPb4Nyg8XGlJeXbdw0ZnR579zLeo9SivkrVmd0ngc129B8OfW4yvN+yLp3qCCh6NJ61Vl47VIj+JiNYR0XQiaqeUgYgmElEOEeUUF8tgkWAE9dtCSWy/zt2PL9btw5HK2pDBXCLzJ5QbjbFjNYWHK3GovEZx2yvfbgt8Lq2qw85DFXjgk/V2Va1R0appuu1lxiXyRLSQiDYo/I0F8DqAXgAGANgH4J9Kx2DmqcyczczZmZmZ8VRHEGLCTJu8m00iet4y/Fk8Lj6PRKZDiwzby4zLJs/MF+rJR0RvAfginrIEIRbcLLpupGHAWhouUWZBR8NK75rOQV+vBCDLzgi2kQhRKO1GzSgVMmfAv2yiHRVqjGhcllZds1Z61zxLRAPgvV4KANxmYVmOMOCJb3D3Bb0x4eyeTlfF1dw4bQXat8jAy+MG2lJe/0e/Rr1PuRjAofJqDP77wpA84fdT+Pf7Zq0NLJt387Ae+Ntl/Q3VIVpHOJae8orth3DH+6sM7+dHTUMqauqDMnn/lVXV4ZZ3I6OAComHZT15Zr6RmU9h5lOZ+XJmTrrh+pKKWjz5xUanq+F6lmw7iLlrlGetmg0z42hNPapqPb7vwLo9RzT3UepBBa+L+s6yAgPlm5svmGlLdxjfKQi1fmJhSUOoiOCm+Hazc6EZ7OSc3h1sK8uJFcjEhVJIKpTEMyM18jIPzsfMFnjXxLfdElROMtiz0klTfOc2TR0pt1Nr+8p1woooIi8kFeEaxWCkpdh3Z+l1nXTTwGZIXdxTLcexotftxEiRiHyMuOkmFRpQ+l3S0yIvc6fHZWNxUYz7ipNL1nGkJ59AJKIf8cgXv8fN7yR3CNpL/rUk5DuzsrkmmN9O/Ql7j1Rp5rl1hr5BSKVnf4/J8zDlq82B7/f/by0GP7lA1/H00veRr/C3udoObGqXbPC5n/bENybWyhiNvd+UqDNek5ZE7MlvLSrH4i3JPat4a1F5RFqKju7Tqp2HNbcv3GRsEDL8+njj+/zA549XFqKsus7Q8aJRVevBjOU7o9TJ1CITmtO6tTG8z60meNHJwGsCkYg9+caKnldkrZgiRrDysoi3im4LtRCOnfUb2f9Yw/s8fGk/C2piPSLyMWKWKAjWo+e3Mvuh7carQzomzpPigOKKyAtJDTPr6gGb1pO36eEfywBeIpoYncSaQVJjQfXMwOp48knJKY/NR9e2zZyuRsxc/spS7DlciZWPXOR0VRT596Jt0TOFUVJRgwFP6B/MDNe7+ji7uWt2l+CKV5eFHL/H5HkxHWvCu79g0eYDmDnxDPx26k8AgHsu7I2Fm4pCjq+EVplu13in6te9XXPbyhLvmgShrKoOm/eXOV2NmFlXeASHjiqHnXUDby3ZbnifPSXK8eGZlcUjXNTjNWV8b+KA9iLfTNPgGacvLTT+4HMzX/7xHKerAADoe2wr3HV+L808yx+4AF/cfbZNNTIf6ckLriMWvdXyWlAa0KupD11AwxOnylsyaGj2YlUu6sn369La6SoAAM7v2xFpUVxsO7dphs5tzHlzl8lQghAjaq/BDGVxqw0XeTcpoEW437tGsAIR+ShU19VrDljZOGPeMDV1nrh7qE4QS5Oqijwri0dlcORF6BP5cg3f9nieER4P42h1XcR1ptenWu9vXBF2zoI2VtzaYpN3GUer69Dn4a/x4oKtqnncHLf8xIe/wiNRZkEmNg3ipmmuUVDg0qpQwdajkyf/bT6W5R1U3PayjsHiI5W1iukTZvyC/n+bj2e+3hKS/u6P+qJO3v7flbryjX55SfRMNtD32FaK6Xa+TGW19w629jhGfdB10HGKK5bGhdbEPKsmSonIa1Ba5b0pZ+bsdrgmxvEL2/srdjlcE+PEZJPXuD/M1A41kdfDryqD3f5ZyLPCrjN/uGQlgs/3m41FqvncyMzbzlRMt/Ot+NJTO2PWbWfiN9ndVfO8ccNgXcf6+Hbl83EL8a7xei0R5RKRh4iyw7Y9QER5RLSFiC6Or5rO4H/qavUw3GquaQQm5hC0fgY9/uF6f8YEtH65ihF9O6JNM+XFrPWEnzALIsKQnu0138RbNNHnl9Irs6X+cnXnNI94vWs2ALgKwJvBiUTUD8A4AP0BdAGwkIhOZOaEMgr6f3+tG9uJWBRCJGo3K0PfZCi9WDl42divJBdbPjUxUm8nzLtx9eSZeRMzb1HYNBbAR8xczcw7AOQBGBJPWU4QWO+SGSUVNThUXo3qutDnVE29J+Bzzcw47HslV3s1j0Z1Xb3mAJ9ewqWopi7y1T+8jkWlVSirUrYbR6O6rh5lVbU4UlGLo9V1qKipw69HvW0WjYqaOlTVGn/+MyPQ3mr3TmVtfcDsZgbMsf9GB0qrNK+LQ0drDA2U7y2pjKnd3IqdPXk3kmgzXrsC+Cnoe6EvLaHwm2IOHdWeTfmPLzfh4Uv74bXv8vHc/C146JKT8NSXmzD7jrMw2ODgzbVvLMe6wiMomDImnqqHmCjyDpRh8uz1EXkGPbkAH9w6FGed0AElFTUY+vQiAIip7PFTf8KqXSUx1bXfo/PRqmka1j9mzKr39tIduOP9VVjyl/NVe8Fvfr8db35vfHKVGh4P46rXfkTu3tJAO4U/+NXwz17V4srXlkXNAwD5xeU4a8q3uvK6iVO7tVXd5iaN79fZfj/+jq2aWHLcqD15IlpIRBsU/sZq7aaQpthFIaKJRJRDRDnFxYkZBnfuWu/6pQt8A2CfrN4DANi4r9TwsdYVaq9Hqpfgxt5efBQ5KqF01/rKO1wRX283VoH3U1ZlvGe80ndOuw9XxP8arHN3DwO5e0N/12qFt6RYWavz9y8qjf6G5EbuvuCEiLRHfNEdnezJZ4YJ7CwDg6lGTLZqp5jVvjl6d1L2OoqXqD15Zr4whuMWAggetu4GQHElZ2aeCmAqAGRnZ7tqWEtvZerCJtb4e9FOdkwa08Ar2Tgy4vYJRW4nRcFToX0L70Cskz35Ns3SUVzW8OBsqXPQ1SxOtEjgAetcKD8DMI6ImhBRTwC9ASTckkR6hbK23meTD0t38qINFiM9vdxEjlBIZF9bJ3AzuRZ/mzrZk7dr0mDCLRpCRFcSUSGAMwHMI6L5AMDMuQBmAdgI4GsAdyWaZw2gv9dWXl2H3b9WBK7WPYe9wbJ+LfcOslXX1WP/kSpU1dbjQKn2MnPhHD5ao3swNPj4RsUo+PW/osbcVYuC2bDnCI5U1uJAWZX6oKHBuhPsu3m2FjUEpisqrUJlTT3KYzA1xYuVv5Hd+PXVyU5RvYtDRMdLXO8kzDwHwByVbU8BeCqe4zuOgd/9nGcXBwZr/Eu7/XPBVtw9ojfunbkW89bvwzm9O2DJtoOGBjYHPrkATdJSsOXvo6PmnTDjFyzLOxRxfD3X1fi3GgYFL/v3Uiy677yIPJ+sKtRxJHU27DmCS/+9NPB9UFZbfHLnsIh8Rm83IrLNjPJj/qHA56FPL8JJnVtjUwxjL/Hif3tMBjwuMG+ed2Jm1OUTVTFQ8WNaZsRWRhzIjFcNjN5Gal4W83P3AwCWbItttqTegb1leYeiZ9JBfvFRxfTVcQ6u7jxUEfJdbbDWqOmIKH4zSqwC44TAJyrhvdhv7xuOFQ+OCPzeVphrlj9wAWbddib+MqqPZr5HLu2HawZ3M738cDJbNsGPky+wvJxgJNSwBkaFQ6135b147e15Gal7ItvjhcQhfDDzeN9MUY+FNnl/mOBo81bSUlPQuU1T08tXoovNCw5JT14DoyYApQlHACK6iXaIaujAa5S8LtN4o+6Q9j9ChVhQ+1XZBTb5eHB7vUXkVThQWhXiUqWH8IUoAul14bHLvcvVqa1mBKiHtT1QVoUDZdqDtwUHj4bsr2VSINIvkPtVBo2LdA4mH40yS3TnIW+9YzHXHK6Ib6WrAwnqd55IqD28AzZ5t6ulCThxjmKuUWGIb/anEZR68koC62HG2c8sRnl1neog7HVv/YTPJkUuOTbkqeizUs97/juc2KkhaNLz36iHSmbWv2DGApVoh0N1ttVfZq/T3D78ue9warc2MfTKCZPeX2V4r2DKTAglIcRGf98qUcN6HePoGIee2ekDs9rGPTblJ/u4dqqTFM1EevImoiSWSr1fD3PU2CfxznzdWlSuO6+bzDXrCo8Yrg8RsPeIMddUQT96ptu3Upg89NmkSM8pJQZmtcPqRy7CmFM7G66bfqJfVOf16Rg1z8yJZyrGoP/h/84P+X7VIOUoLv5+/IbHL8YHfzgjanlmICJvIuGLQwNAqsLrmZtE1WuucVGFYiD5X/KdpWNrHSLfNFLkj2vfIuS7lqWiXQv7XQtjISMtBc0zQs+VALQMO//mGamax2nZJA0ZafbIr4i8iSj15JU8BpQeBk7B7K6HDmD8odMYbLluR/E3cNXP4qrK2IrY5BUoOKjsJx4NJRfK8NV+AHUb+NrdJZrHD4+RAwBrdpegZZNUzVWEtHjm680YP0R9dRzAO8sz/4B+848RCg9XRM8kuB5FjXeVrprXk9FzJDetMyEir8B5z39n2rE+WxsZl01JkI9U1mLsq+phZhdsLIrwMy4qrcIVGvvoZcy/lkakMXOgdzbyxR/iLkONs59ZHJFm9IHl1tW5koUOLaOba5TeWMPTrFoYW+1N9KxexyimN0vXNqUYrwOB2ROWppz3lG5tTC1bD2KucQClmbFKvfRg9h2pRFXYfrGE51VCyZXTbSYcLdzUa0pG1JbrC0bpQRuepNes1q9zaww/MVNX3m1h4T4u7t8p8Pm9CUMDn/3X84UndcL6x0bqOnY8qJ2pldEm1RCRdwAlm3xaivZPQVCaRGWdEut1qxQEQLknH6u5Jj0tRde+RN6Zqmr1SFV48qSmRO5jFKV5HG4eFxKRd4BYgksRETzmrU0RlUSSeBffX40Hhd8gnjAFevY0cnyrrxE3hwYRkXeA/Qo+3dE8Sl7/Lj+kd+3xmLtAdTjRBoFX7zpsOGyyVQSH/xWcQY/gGtFZPZe2kbEYK+8Vt/fsReQd4IZpKyLSol2Ee0oqQ2zn7/5YYHKtQrnmjeWa26987Udc/JJ1A7JGuHfWWqerkNRcflqXqHnGD8mKSAvXuWi6l9XeO8lo/Ona3l5+bh/eSzG9XfN0DOjeVnGbFeM3LvKIVkRE3iXouU6CQw7vO1LpuEkl3nVhhcRgxEmd0CfKgOE4BWE2aq45pmUTFEwZg3EKD4xgvrj7bBRMGYP7RnrDBxdMGYPXrh8U2L760ZH49C59s21NgWM3b069cbCpVVEi3pWhriWiXCLyEFF2UHoPIqokojW+vzfir2pyo8emFzxg63XbsrJGgtBANHOi0iCnVQYLpeveyXvB7TPG4/WT3wDgKgBvKmzLZ+YBcR6/0aDnMgkReeuqIgiGURT5iJ68gYHSGOvhhCmcWcld1P56qBHv8n+bAHcNMsTKnpJKFBw8imEndLC13COVtZi+dAdqFfzk//CfnJDv/wlanmzBpiKc5Ftu0CpKKmqirmb10kL1CJdC40EpRlO47huRCa1Oj1bP2Y4efXgZHo6skZvesq2c8dqTiFYDKAXwMDMvUcpERBMBTASArCxtW5yVjHzhexytqTe0/qoZnPb4N6rbwkP7HixviHm+vfgo7pm5xqpqAQBGv7wE+6JEd3xp4TZL6yC4A6V5HF3aNA1E/0zR1ZM3ByUBjbowjiU18dKyaVrMExNP8w0QXz/UOu2LapMnooVEtEHhb6zGbvsAZDHzQAD3AviAiBS7ncw8lZmzmTk7M1PfLDcrOFqjvD5rYyaawCc6H9w6NGqes4Pe7P46qq+V1XGUd28+XXO7X+ODwweP7H8sAO/6qHbiok4yCqaMQZO0yDAJep9vnVo3RcGUMTi/b/Qwx7EStSfPzBcaPSgzVwOo9n1eSUT5AE4EkKO5oyDYic5ZlX6ihZ5IZgJL9Ck0mt7+uln9eiUnBTeZR9yGJS6URJRJRKm+z8cD6A1guxVlCYJd1LrdIdoG4ok2acR6o5VV61eIVoYZFiQ93jRueujE60J5JREVAjgTwDwimu/bdC6AdUS0FsDHAG5n5l/jq6og2E+wXVlpcLyxYLdoaQ68amx0YuDV7cTrXTMHwByF9NkAZsdz7ETGrrUbBXtJZnPNad3aam6/58LemPjeSvTo0ALn98lE62bpEWaT1BSKCL43om9HlFbV4pcCM+8HpTACUfbQKcwj+3VCBx3LHSYSMuPVAv7v4j5OV0GIkWAvhz+c0zPEbBBLYDk7WXjvuTHv265FBj6586yI9KsHdQPgHWQtmDIGLZuk4Z2bh+DlcQMDefxt9MYNkbM3p/3+9EBeK0NCm9W7nvq7bDx95Slxl+8mr3IR+TDMiCbndjEQvCiJTvBUfObQm9Xt5pp4oj4Cyr7uWocMv8rVAob585lmk9e4vdTKsFt03WTSEZEPw4z1V90uBoI6wWIQfiXUufzhHa/IK8aE18gf8Ljx7Rdv+XqJ5Vdwk+jaTaMV+SXbirEsL3I259VRoi/qoUZEPmGJ6MkHbXP7w1sptIARlNatMaTbJmq8VQOv5njXJBaNVuRvnPYzrn87MuRveBx1tZClfpqmRzbh0J7t46ma4BKuPyMrxLvmD+ce72BtotOpdVM8Oba/7vzDTghdA7VXZkt0a9csxCZtxI6ultNvArXST95NNnA1/jiityPlNlqR18undw1TDHVw3dAsFEwZg81Pjo7Y1rZ5BgqmjMFjlxmfCdi+RUZM9RSMoyQMwT35XpktQ4TJilhBr1w3MHomHRRMGYOMtBSc1yd05mS4E8Dcu4YFOi73XhS6rWl6Kpb+9QJcNzQL/7jKK/TaNnkOyaNmrgk361iBneaYWMbt0lII9150ogW1iY6IvIXEclHbZdcUlDG60EW8mD2/Sk99G6w60QN96VprNXBc8xrLqslQVuHmcMMi8hYSywWXFqddVYgPu1vf7LVBdS3D58uj9YBpWGpS/XhOuQ3G0mROi7CTfbekE/npS3dgy/7QNT8XbSrC/Nz9ivln/rIL17/9kyV1ieV3FY13lsib0dofxGOxyCuGIfD91xzA1Ng/8oAUclwzsCrUsBm++kpFWDkHIF6STuSf+GIjLvlXaFTjCTNycNt7KxXz/3X2eizLO2Ra+e9NGKKYnpGWguYZkdHqwklNdfZiOb1HO0fLt5KR/Tppbh/V/1jcPKxnSJpf5Lq1a2ZKHf4yqg+GBA3MX9C3Ex4Y3Retm5oT9Tu4k6C2tNxjl/fHoKy2OLVbGzx15ckYPyRy6b5LT+mMU7q2we3nKq+jCnjXWO3fpTXGnNIZgLp50ojpJxrHHdMcA7vHfo2a2aMPXvtW6bjPX3ta4LOTD4GkEnn/q68Zvu6xUDBlDM7p3RAuOfiif/TSftj4xChFc0zw4JtS2FKlcjLSrPnpJo8+yZLjOs3JXVtj6u+yQ9KCf4mCKWPwxo2D0aWtspjHGk43fNB+YPd2mHXbmYHvbZql47bhvbDusYsNHzv/6UsiE4NOyh8KOJyTu7bBJ3cOQ9P0VFw/9Dj846pTI/K0a5GBz+8+G1nHNFctv3v75pj3x3MCzgJmmiTUDvXJHWehmUZnyU6zSDRvmWsGd7OpJtoklcjbqe0G3mIBNLyWK9lMg18x9drkrYqjEq+vtVuJ1Sqix7Rh6HgWC2GEucbGHqQdJdXH+UO42axiFUkl8nUe905WCbxdmHSNWfVAU5rangwotZce76eGLO7znojmAmo3SqtDAZGulvHgipAhSlWIVi0Hbysrl/+zjfzictw7cw3KqhuW4OoxeR5eHjcAYwd0DaR9sW4v7p25FiNOin8VFj2XWnCvwS8ybu8oK816TAbi9WJx47R4pYeUk9dXtKLN6EXX1sXWkbPi9zMrFo/VJMUtXVJRg7WFR7C9+GhI+p8+WhPyfdIHq1FT78FXG5Q9bcJ59poGW+Vz10TaLaMxdkAXZB/XDgO6t8W12V773Ad/OAMXntQRF/dXHgTU61s//57YIw4CDWtLhpOeGnlJPDzGHXb6gVlt0b9L9AlJd54XOVjoN5d1DbK5D8xqC8AbDjeYN24YhEnnnwDAa4sfffKxisuzXTWoa0RaNPy/7vTfZ+O24aEzaMcP8UbAHNKjYWC2T6dWgc/+gdThJ2YGomX+8YIT8MYNgwJ52jRLDznmdUOzcFHQgHM/He0XK2rXbrd2zXHVwK547fpBituVePzykzH65GNxy7CeePt32bhigHeQs3t75TGCESd1xKj+x+LBS6y/Vl+5bhCuGNAFPY5p0ZDoO/Wm6Sm4qF8n3OW7ftxAUoh876AbwSxeuW4gfpPd4HVwbXaoB4IeKW7RJA0f33EWPr1rGFo39d58g7La4e2bTsebN2YHvBK0jjtJ5WLpc2wrFEwZgx4aA2N+mqWHDlQ9POYk/FUlHHJThYHfW8+JnM7/v9vPxDm9O0SkG+U+A7MAX71uUGDAuVUT9ZfQKwZ2RcGUMbjnwoaBMf+b1LLJFwTS0lNTUDBlDKb9PnR901End8b9vvbp1q45Xr9hMJqGtWH7Fhl44TcDQtJm33EWbh/e8IBZ8eAI1Tp6vWpCBem0bm0AAD07NIjHy+MbyvCH+51xyxA85Qs9cO/IPhh1csN1FC60bZql462gAWelh7hZqPVPUlMIL/x2AE7u2kb3sbKO8bb7o5f1w4X9OuGlcQNRMGWM6phR0/RUvHHjYHRrp3w/mGnF6telNV4aNzC0Lr7rq2UTb3tnuigmfbwrQz1HRJuJaB0RzSGitkHbHiCiPCLaQkTGXQcMkLB2ZD2zCaPk0dPzV/LFVnt7TU+zty3V7LhKBJ+qlqup0haz/dGVykhPpRBXugyDgqrkm+5GM5Eabr4L7WpHt4Q6Dibex/oCACcz86kAtgJ4AACIqB+AcQD6AxgF4DX/mq9WYIVHiFtG4c2I96F0gatd9Gk6jfJOiE/wb6LlhaTUZnbUNy0lJeTpqeTmqvV7+h9EVsZ4sZKECMmRAFU0m7hEnpm/YWb/aOdPAPyOoWMBfMTM1cy8A0AeAOVZQiagJfI9Js+zpEwzrmelN5C0sB5qtLcUPQ+48F4sEWm+WuuBmU0JwWBEGIga2kPrYeSvVvCxzb63ldrJ25NvIPy31H/sGCvlMG4WeX/VrK6h2i3hpLXBTO+aWwDM9H3uCq/o+yn0pUVARBMBTASArKwspSxRsaIBL1KYHfnOzacj/0A5th88ivtHNti0/3ZZP5zQsaXhMh65tB9aNU3DxUGTVqb//nRc+doyjOjbCdV1Hkw4pyf6HNsKt/9XecbuW7/LxvnPf6daxrjTu2NWzm4A3gG7zFZNcN2QLKSnEn535nEYPyQL7/20E+NO746vNuxHu+ahA3e/9Y1FXDWoKz5ZtSdk25SrT8VN03/G5v1luOGMLCzeXIw9JZWG2iCFgEFZbbFqV0kg7ZzeHeBhjpiJnJZCeGncALy9ZAeKy6oxb/2+kO1N01NQVesJ9IRvObsn9h2pBBFhwtkNM1nfv3UoNoeFvtDLt/cNxwX//B5zJw0DAHxw61B8uWEfmqaloldmyxAvHr+55s0bByN3bylKK2sx+Dj12ZpXD+qGDXtKcf/IPujYqimGn5iJPp1a4dRubdC/i3579gu/OS3iLeKB0X2jhs2Ol/5dWuOWYT1RU1+Py07tEn0HGxnZ71hcPzQL91xoTSTIzFZNcNvw43HNIOUJUB/fEbm0ol1QNNcyIloIQGnq3EPMPNeX5yEA2QCuYmYmolcBLGfm//q2TwPwpW+Bb1Wys7M5JycnhtMwv8euFF7YSvz11ypXLc91b/2EH/OVQzMsum84RvzzewBA7uMXo4XGgKVWOVv2l+Hil34IfP9o4hk44/jQeOQPfLIeH/68K+rxg3nwkr6Yn1uElUELny/5y/no3r45nvh8I6Yv2xFIX/fYyMAA9p9nrsGc1Q0PnYIpY3Dus4ux69cKfHf/eegRNHhpJ3//YiPeXroDD17SFxM1QgIIjQM997UZENFKZs5W2hb1jmfmC6Mc/CYAlwIYwQ1PjEIAwe4o3QDs1VddwUyC33JiNR8AkeYppb5BLC9UBFIdFA0fBA4eyFQyl5g56SZe3DKmIwjxeteMAvBXAJczc0XQps8AjCOiJkTUE0BvAD/HU5agjtbLWLAY6h1UVcIqySKKnI3qP59w75Tg70rjAYFAWA4KbAI5wwiNhHht8q8AaAJggc8O+hMz387MuUQ0C8BGAHUA7mLm+jjLElRQWoLQT0oKISMtBTV1nrhmQ4b3nJWO1SSGoGnpqSloEibm/p5484zQyzPY3VIpSJV/PoCTs3b9fujpDkcTFQQ/cYk8M6tO62LmpwA8Fc/xjXDVwK74ZPUe1e0tm6ShPCjsQTjTbsrGhBk5mHB2T0fC7b7429PQqXVTzTwf3DoUB8qqI9KfueZU/HtRHpqmp+BAWTVq6jwY0rM9dh6qQOfWTfH5pLOxZFuxbte8Ry/thzN7hdrbe3ZogXsvOhHn9O6ARZsO4PSgWZl+7r3oRGSkpuD4zBbIat8CB8urcfeHqzH7Dm/UxatfX46zeh2DH/MP4f6RJ6Ksqg7jhnTHyP6d8OHPu3Ht4G6Ys3pPIKzvzcN6oKSiBnUeRn5xuWJZg45rB4/vVWDaTadj7po9IbNa7ebuC04AM2P80NicCITk4smx/TEgjtDIZhB14NVO4hl4FQRBaKxoDbwmqEeuIAiCoAcReUEQhCRGRF4QBCGJEZEXBEFIYkTkBUEQkhgReUEQhCRGRF4QBCGJEZEXBEFIYlw1GYqIigHsjOMQHQAcNKk6iYq0gRdpB2kDP42hHY5j5kylDa4S+Xghohy1WV+NBWkDL9IO0gZ+Gns7iLlGEAQhiRGRFwRBSGKSTeSnOl0BFyBt4EXaQdrAT6Nuh6SyyQuCIAihJFtPXhAEQQhCRF4QBCGJSQqRJ6JRRLSFiPKIaLLT9bESIiogovVEtIaIcnxp7YloARFt8/1vF5T/AV+7bCGii52reXwQ0XQiOkBEG4LSDJ83EQ32tV8eEf2L9C6X5RJU2uExItrjuybWENElQduSrh2IqDsRLSaiTUSUS0R/8qU3uutBF8yc0H8AUgHkAzgeQAaAtQD6OV0vC8+3AECHsLRnAUz2fZ4M4Bnf536+9mgCoKevnVKdPocYz/tcAIMAbIjnvOFdUP5MeNcm/wrAaKfPzYR2eAzA/Qp5k7IdAHQGMMj3uRWArb5zbXTXg56/ZOjJDwGQx8zbmbkGwEcAxjpcJ7sZC2CG7/MMAFcEpX/EzNXMvANAHrztlXAw8w8Afg1LNnTeRNQZQGtmXs7eO/w/QfskBCrtoEZStgMz72PmVb7PZQA2AeiKRng96CEZRL4rgN1B3wt9ackKA/iGiFYS0URfWidm3gd4bwAAHX3pyd42Rs+7q+9zeHoyMImI1vnMOX4zRdK3AxH1ADAQwArI9aBIMoi8kg0tmf1ChzHzIACjAdxFROdq5G1sbeNH7byTtT1eB9ALwAAA+wD805ee1O1ARC0BzAZwDzOXamVVSEuadohGMoh8IYDuQd+7AdjrUF0sh5n3+v4fADAHXvNLke/VE77/B3zZk71tjJ53oe9zeHpCw8xFzFzPzB4Ab6HBJJe07UBE6fAK/PvM/IkvWa4HBZJB5H8B0JuIehJRBoBxAD5zuE6WQEQtiKiV/zOAkQA2wHu+N/my3QRgru/zZwDGEVETIuoJoDe8A03JgqHz9r3ClxHRGT4vit8F7ZOw+IXNx5XwXhNAkraDr87TAGxi5heCNsn1oITTI79m/AG4BN4R9nwADzldHwvP83h4vQTWAsj1nyuAYwAsArDN97990D4P+dplCxLYcwDAh/CaImrh7YFNiOW8AWTDK4L5AF6Bb9Z3ovyptMN7ANYDWAevoHVO5nYAcDa8ZpV1ANb4/i5pjNeDnj8JayAIgpDEJIO5RhAEQVBBRF4QBCGJEZEXBEFIYkTkBUEQkhgReUEQhCRGRF4QBCGJEZEXBEFIYv4f129cJf25/o4AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-3:]) > 16.:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
